Beispiel #1
1
def replace_links_with_text(html):
    """any absolute links will be replaced with the
    url in plain text, same with any img tags
    """
    soup = BeautifulSoup(html, 'html5lib')
    abs_url_re = r'^http(s)?://'

    images = soup.find_all('img')
    for image in images:
        url = image.get('src', '')
        text = image.get('alt', '')
        if url == '' or re.match(abs_url_re, url):
            image.replaceWith(format_url_replacement(url, text))

    links = soup.find_all('a')
    for link in links:
        url = link.get('href', '')
        text = ''.join(link.text) or ''

        if text == '':  # this is due to an issue with url inlining in comments
            link.replaceWith('')
        elif url == '' or re.match(abs_url_re, url):
            link.replaceWith(format_url_replacement(url, text))

    return force_text(soup.find('body').renderContents(), 'utf-8')
Beispiel #2
0
def process_line_exceptions(line, extra_tags):
    global except_base_tag

    if not ' ' in line or re.match('.*[а-яіїєґ]/.*', line):
      return line
    if re.match('^[^ ]+ [^ ]+ [^:]?[a-z].*$', line):
      return line

    if line.startswith('# !'):
      except_base_tag = re.findall('![a-z:-]+', line)[0][1:] + ':'
      return ''
    
    base = re.findall('^[^ ]+', line)[0]
    
    except_base_tag2 = except_base_tag
    if base.endswith('ся'):
        except_base_tag2 = except_base_tag.replace('verb:', 'verb:rev:')
      
    out_line = re.sub('([^ ]+) ?', '\\1 ' + base + ' ' + except_base_tag2 + 'unknown' + extra_tags + '\n', line)
    
    if except_base_tag in ('verb:imperf:', 'verb:perf:'):
      base_add = 'inf:'
#      if base.endswith('ся'):
#        base_add = 'rev:' + base_add
      out_line = re.sub("(verb:(?:rev:)?)((im)?perf:)", "\\1inf:\\2", out_line, 1)
      
      out_lines = out_line.split('\n')
      out_lines[0] = out_lines[0].replace(':unknown', '')
      out_line = '\n'.join(out_lines)
    
    return out_line[:-1]
def create_filetree(path=None, depth=0, max_depth=0):

    tree = None

    if max_depth == 0 or depth < max_depth:
        if path is None:
            path = os.getcwd()

        tree = dict(name=os.path.basename(path), children=[])

        try:
            lst = os.listdir(path)
        except OSError:
            pass  # ignore errors
        else:
            for name in lst:
                fn = os.path.join(path, name)
                if (os.path.isdir(fn) and
                        re.match('^.*(Compiled)$', fn) is None):
                    child = create_filetree(fn, depth + 1, max_depth)
                    if child is not None:
                        tree['children'].append(child)
                elif re.match('^.*\.(m|def|txt|csv)$', fn) is not None:
                    tree['children'].append(dict(name=fn.replace(
                        os.getcwd() + os.path.sep, "")))

    return tree
Beispiel #4
0
    def _sanitize(self, badKey, badVal):
        valid = True

        # Used for debugging
        if 'csv_line' not in self:
            self['csv_line'] = "-1"

        # Catch bad formatting
        if badKey in self:
            logging.debug(badKey, ''.join(self[badKey]))
            logging.debug("Bad Key")
            valid = False

        if 'last_pymnt_d' in self and re.match("^\s*$", self['last_pymnt_d']):
            if 'issue_d' in self:
                # If no payment received, last payment date = issue date
                self['last_pymnt_d'] = self['issue_d']

        for k, v in self.items():
            if badVal == v:
                logging.debug(badVal)
                valid = False
                break

            # Replace empties with 0s
            if re.match('^\s*$', str(v)):
                self[k] = 0

        if not valid:
            logging.debug(self.items())
            # Can't safely access specific keys, other than id, when incorrectly formatted
            logging.warning("Fix Loan {}".format(self['id']))
            logging.warning("Line {}".format(self['csv_line']))

        return valid
Beispiel #5
0
def process_line(line, extra_tags):
  line = re.sub(' *#.*$', '', line) # remove comments

  line = re.sub('-$', '', line)

  if not ' ' in line or re.match('.*[а-яіїєґ]/.*', line):
    out_line = line
  elif re.match('^[^ ]+ [^ ]+ [^:]?[a-z].*$', line):
    out_line = line
  elif re.match('^[^ ]+ [:^<a-z0-9_].*$', line):
    out_line = re.sub('^([^ ]+) ([^<a-z].*)$', '\\1 \\1 \\2', line)
  else:
    print('hit-', line, file=sys.stderr)
    base = re.findall('^[^ ]+', line)[0]
    out_line = re.sub('([^ ]+) ?', '\\1 ' + base + ' unknown' + extra_tags + '\n', line)
    return out_line[:-1]

#  if extra_tags != '' and not re.match('.* [a-z].*$', out_line):
  if extra_tags != '' and (not ' ' in out_line or ' ^' in out_line):
    extra_tags = ' ' + extra_tags
    
  if '|' in out_line:
    out_line = out_line.replace('|', extra_tags + '|')

#  if not "/" in out_line and not re.match("^[^ ]+ [^ ]+ [^ ]+$", out_line + extra_tags):
#    print("bad line:", out_line + extra_tags, file=sys.stderr)

#  if len(out_line)> 100:
#      print(out_line, file=sys.stderr)
#      sys.exit(1)

  return out_line + extra_tags
    def __init__(self, host, debugfunc=None):
        if isinstance(host, types.TupleType):
            host, self.weight = host
        else:
            self.weight = 1

        #  parse the connection string
        m = re.match(r'^(?P<proto>unix):(?P<path>.*)$', host)
        if not m:
            m = re.match(r'^(?P<proto>inet):'
                    r'(?P<host>[^:]+)(:(?P<port>[0-9]+))?$', host)
        if not m: m = re.match(r'^(?P<host>[^:]+):(?P<port>[0-9]+)$', host)
        if not m:
            raise ValueError('Unable to parse connection string: "%s"' % host)

        hostData = m.groupdict()
        if hostData.get('proto') == 'unix':
            self.family = socket.AF_UNIX
            self.address = hostData['path']
        else:
            self.family = socket.AF_INET
            self.ip = hostData['host']
            self.port = int(hostData.get('port', 11211))
            self.address = ( self.ip, self.port )

        if not debugfunc:
            debugfunc = lambda x: x
        self.debuglog = debugfunc

        self.deaduntil = 0
        self.socket = None

        self.buffer = ''
def register(request) :
  '''
  Handle a Post request with the following information:
  login, password, email
  '''
  print 'receiving a request'
  #parameter retrieval
  try :
    login = request.GET['registerLogin']
    password = request.GET['registerPassword']
    email = request.GET['registerEmail']
  except MultiValueDictKeyError :
    response=HttpResponse('400 - BAD URI')
    response.status_code=400
    return response
  
  #parameter validation
  loginIsValid = re.match('[\w0-9]*', login) and len(login) > 3 and len(login) < 16
  passwordIsValid = len(password) >= 6 
  #TODO check with number
  emailIsValid = re.match('[\w.]*@\w*\.[\w.]*', email)
  
  logger.info(login + ' ' + password + ' ' + email)
  
  if loginIsValid and passwordIsValid and emailIsValid :
     return processFormInformation(login, password, email, request)   
  else :
    response=HttpResponse("400")
    response['message'] = 'invalid information'
    response.status_code=400
    return response
Beispiel #8
0
  def _apache_index(self, url):
    r = requests.get(url)
    if r.status_code != 200:
      raise ValueError(url+" status:"+str(r.status_code))
    r.dirs = []
    r.files = []
    for l in r.content.split("\n"):
      # '<img src="/icons/folder.png" alt="[DIR]" /> <a href="7.0/">7.0/</a>       03-Dec-2014 19:57    -   '
      # ''<img src="/icons/tgz.png" alt="[   ]" /> <a href="owncloud_7.0.4-2.diff.gz">owncloud_7.0.4-2.diff.gz</a>                     09-Dec-2014 16:53  9.7K   <a href="owncloud_7.0.4-2.diff.gz.mirrorlist">Details</a>'
      # 
      m = re.search("<a\s+href=[\"']?([^>]+?)[\"']?>([^<]+?)[\"']?</a>\s*([^<]*)", l, re.I)
      if m:
	# ('owncloud_7.0.4-2.diff.gz', 'owncloud_7.0.4-2.diff.gz', '09-Dec-2014 16:53  9.7K   ')
	m1,m2,m3 = m.groups()

	if re.match("(/|\?|\w+://)", m1):	# skip absolute urls, query strings and foreign urls
	  continue
	if re.match("\.?\./?$", m1):	# skip . and ..
	  continue

	m3 = re.sub("[\s-]+$", "", m3)
	if re.search("/$", m1):
	  r.dirs.append([m1, m3])
	else:
	  r.files.append([m1, m3])
    return r
Beispiel #9
0
    def processAux(self, dFrag):
        self.depth=self.depth+1
        if not self.files.has_key(self.depth):
            self.files[self.depth]=[]
        thisDir=self.compoundDir(self.topDir, dFrag)
        os.chdir(thisDir)
        self.theDict[thisDir]={'xml': [], 'bin': [], 'dir': []}
        # print "Processing",thisDir," Depth",self.depth
        thisDirContents=os.listdir(thisDir)
        for fname in thisDirContents:
            if stat.S_ISDIR(os.stat(fname)[stat.ST_MODE]):
                if not re.match("^(CVS|images|search|photos|htdig|\.)", fname) and self.depth<4:
                    self.processAux(self.compoundDir(dFrag,fname))
                    self.handleDir(thisDir, fname)
                    os.chdir(thisDir)
            else:
                # print "File",fname
                if re.match(".*\.xml$", fname):
                    self.handleXML(thisDir, dFrag, fname)
                elif re.match(".*\.(jpe?g|JPG|gif|png|html)$",
                              fname):
                    self.handleBinary(thisDir, fname)

        self.writeIndex(dFrag)
        self.depth=self.depth-1
Beispiel #10
0
    def __init__(self, filename):
        self.name = "YNAB" 
        self.transactions = []

        with open(filename) as register:
            dr = csv.DictReader(register)
            for row in dr:
                trans = self._process_row(row)
                while True:  # Merge split transactions into a single transaction
                    regex = r'\(Split ([0-9]+)/([0-9]+)\)'
                    match = re.match(regex, row["Memo"])
                    if not match:
                        break

                    for split_row in dr:
                        match = re.match(regex, split_row["Memo"])
                        t = self._process_row(split_row)
                        trans.amount += t.amount

                        current_split = match.group(1)
                        max_splits = match.group(2)
                        if current_split == max_splits:
                            break
                    break

                trans.amount = round(trans.amount, 2)  # This fixes errors from adding numbers that can't be represented in binary and expecting them to equal one that can that came from Mint.
                self.transactions.append(trans)

        self.transactions.sort()
Beispiel #11
0
def main():    
    f = open('4_dataset.txt', 'r')
    x = f.readlines()
    
    for line in x:
        if re.match('a={(.*)}', line):
            a = re.match('a={(.*)}', line).group(1).split(',')
        elif re.match('b={(.*)}', line):
            b = re.match('b={(.*)}', line).group(1).split(',')
    
    f00 = f01 = f10 = f11 = 0
    print 'a =', [ int(i) for i in a ]
    print 'b =', [ int(i) for i in b ]
    
    for i in zip(a, b):
        if i == ('0', '0'):
            f00 += 1
        if i == ('0', '1'):
            f01 += 1
        if i == ('1', '0'):
            f10 += 1
        if i == ('1', '1'):
            f11 += 1
    
    print 'Similarity Coeff =', float(f00 + f11)/(f00 + f01 + f10 + f11)
    print 'Jaccard Coeff =', f11/float(f01 + f10 + f11)
Beispiel #12
0
def checkInCNAME(node_text, nodes):
	try:
		InCNAME = re.search("IN CNAME (.*)", node_text)
		alias = InCNAME.group(0).split("IN CNAME ")[1]
		#IP address found
		if re.match("(\d{1,3}\.)", alias):
			return alias
		# cname is a subdomain
		elif re.match(".*[a-x]\.", alias):
			return ("subdomain found (" + alias + ")")
		#cname is another cname
		else:
			try:
				alias_name = dns.name.Name([alias])
				alias_IP = nodes[alias_name].to_text(alias_name)
				checkCname = checkInA(alias_IP)
				if checkCname is None:
					return checkInCNAME(alias_IP, nodes)
				else:
					return checkCname
			except:
				return (Fore.RED + "unknown host (" + alias + ")" + Fore.RESET)
	# node has no IN CNAME
	except:
		return None
    def process_isolation_file(self, sql_file, output_file):
        """
            Processes the given sql file and writes the output
            to output file
        """
        try:
            command = ""
            for line in sql_file:
                tinctest.logger.info("re.match: %s" % re.match(r"^\d+[q\\<]:$", line))
                print >> output_file, line.strip(),
                (command_part, dummy, comment) = line.partition("--")
                if command_part == "" or command_part == "\n":
                    print >> output_file
                elif command_part.endswith(";\n") or re.match(r"^\d+[q\\<]:$", line):
                    command += command_part
                    tinctest.logger.info("Processing command: %s" % command)
                    self.process_command(command, output_file)
                    command = ""
                else:
                    command += command_part

            for process in self.processes.values():
                process.stop()
        except:
            for process in self.processes.values():
                process.terminate()
            raise
        finally:
            for process in self.processes.values():
                process.terminate()
Beispiel #14
0
def area_code_lookup(request, area_id, format):
    from mapit.models import Area, CodeType
    area_code = None
    if re.match('\d\d([A-Z]{2}|[A-Z]{4}|[A-Z]{2}\d\d\d|[A-Z]|[A-Z]\d\d)$', area_id):
        area_code = CodeType.objects.get(code='ons')
    elif re.match('[EW]0[12]\d{6}$', area_id): # LSOA/MSOA have ONS code type
        area_code = CodeType.objects.get(code='ons')
    elif re.match('[ENSW]\d{8}$', area_id):
        area_code = CodeType.objects.get(code='gss')
    if not area_code:
        return None

    args = { 'format': format, 'codes__type': area_code, 'codes__code': area_id }
    if re.match('[EW]01', area_id):
        args['type__code'] = 'OLF'
    elif re.match('[EW]02', area_id):
        args['type__code'] = 'OMF'

    area = get_object_or_404(Area, **args)
    path = '/area/%d%s' % (area.id, '.%s' % format if format else '')
    # If there was a query string, make sure it's passed on in the
    # redirect:
    if request.META['QUERY_STRING']:
        path += "?" + request.META['QUERY_STRING']
    return HttpResponseRedirect(path)
Beispiel #15
0
    def __load_book_menu (self, lines) :
	r1 = re.compile(u'^\s*目\s*录\s*$')
	r2 = re.compile(u'^\s*([^·…]+)\s*[·.…]{2,}\s*([l\d]+)\s*$')
	menus = {}
	start = False
	not_match = 0
	for line in lines :
	    words = line.decode(self.default_coding)
	    words.strip('\n')
	    if re.match(r1, words) :
		start = True
		continue
	    elif start :
		m = re.match(r2, words)
		if m :
		    title = m.group(1)
		    page  = m.group(2)
		    page  = page.replace('l', '1')
		    page  = int(page.encode(self.default_coding))
		    menus[page] = self.__get_simple_string(title)
		    not_match = 0
		else :
		    not_match += 1
		    if not_match > 10 :
			break
	
	return menus
Beispiel #16
0
def filter_services(svcs):
    filtered = []

    # filter includes
    if _args['--has']:
        for sv in svcs:
            for inc in _args['--has']:
                if inc in sv["tags"] and sv not in filtered:
                    filtered.append(sv)

    if _args['--match']:
        for sv in svcs:
            for regex in _args['--match']:
                for tag in sv["tags"]:
                    if re.match(regex, tag) and sv not in filtered:
                        filtered.append(sv)

    if not filtered and not _args['--has'] and not _args['--match']:
        filtered = svcs

    if _args['--has-not']:
        for sv in list(filtered):  # operate on a copy, otherwise .remove would change the list under our feet
            for exc in _args['--has-not']:
                if exc in sv["tags"]:
                    filtered.remove(sv)

    if _args['--no-match']:
        for sv in list(filtered):
            for regex in _args['--no-match']:
                for tag in sv["tags"]:
                    if re.match(regex, tag) and sv in list(filtered):
                        filtered.remove(sv)

    return filtered
Beispiel #17
0
 def _get_type_of_macro(self, macros, clss):
     for macro in macros:
         # ARGN Macros
         if re.match('ARG\d', macro):
             macros[macro]['type'] = 'ARGN'
             continue
         # USERN macros
         # are managed in the Config class, so no
         # need to look that here
         elif re.match('_HOST\w', macro):
             macros[macro]['type'] = 'CUSTOM'
             macros[macro]['class'] = 'HOST'
             continue
         elif re.match('_SERVICE\w', macro):
             macros[macro]['type'] = 'CUSTOM'
             macros[macro]['class'] = 'SERVICE'
             # value of macro: re.split('_HOST', '_HOSTMAC_ADDRESS')[1]
             continue
         elif re.match('_CONTACT\w', macro):
             macros[macro]['type'] = 'CUSTOM'
             macros[macro]['class'] = 'CONTACT'
             continue
         # On demand macro
         elif len(macro.split(':')) > 1:
             macros[macro]['type'] = 'ONDEMAND'
             continue
         # OK, classical macro...
         for cls in clss:
             if macro in cls.macros:
                 macros[macro]['type'] = 'class'
                 macros[macro]['class'] = cls
                 continue
Beispiel #18
0
def _strip_and_unquote( keys, value ):
    if value[:3] == "'''":
        m = re.match( _MULTI_LINE_SINGLE, value )
        if m:
            value = m.groups()[0]
        else:
            raise IllegalValueError( "string", keys, value )

    elif value[:3] == '"""':
        m = re.match( _MULTI_LINE_DOUBLE, value )
        if m:
            value = m.groups()[0]
        else:
            raise IllegalValueError( "string", keys, value )

    elif value[0] == '"':
        m = re.match( _DQ_VALUE, value )
        if m:
            value = m.groups()[0]
        else:
            raise IllegalValueError( "string", keys, value )

    elif value[0] == "'":
        m = re.match( _SQ_VALUE, value )
        if m:
            value = m.groups()[0]
        else:
            raise IllegalValueError( "string", keys, value )
    else:
        # unquoted
        value = re.sub( '\s*#.*$', '', value )

    # Note strip() removes leading and trailing whitespace, including
    # initial newlines on a multiline string:
    return value.strip()
Beispiel #19
0
def tourAllFiles(dirpath):
    global a
    global alen
    global domain
    global person
    
    # names = list of files in current path
    names = os.listdir(dirpath)
    
    # find 'si' and 'sx' prefix and 'phn' suffix
    # filter out 'sa' prefix
    pat1 = '.*si.*\.phn'
    pat2 = '.*sx.*\.phn'
    drpat = 'dr\d'
    for name in names:
        if re.match(pat1,name) != None or re.match(pat2,name) != None:
            phn2label(name)
        
        curpath = dirpath+'/'+name
        if os.path.isdir(curpath):
            # only use to drx/person/xxx.phn
            if re.match(drpat,name):
                domain = name
            else:
                person = name
            # iterate
            os.chdir(curpath)
            tourAllFiles(curpath)
            os.chdir(dirpath)
    def parse(self, response):
        sel = Selector(response)
        result = []
       
        ad = DatesItem()
        ad['name'] = ""
        for p in sel.xpath("//div[@class='poziomd']//text()").extract():

            if re.match("^.*,", p):
                if p.startswith(","):
                    ad['desc'] = p[2:]
                else:
                    ad['desc'] = p[6:]
                ad['name'] = ad['name'].lstrip('1234567890() ').strip()
                if re.match('^.\s', ad['name']):
                    ad['name'] = ad['name'][2:]

                ad['url'] = response.url
                if re.match(".*urodzeni.*", response.url):
                    ad['isBirth'] = True
                else:
                    ad['isBirth'] = False

                result.append(ad)
                ad = DatesItem()
                ad['name'] = ""
            elif re.match("^\s*[0-9]{1,4}", p) and not ad.has_key('date'):
                ad['date'] = re.match("^\s*[0-9]{1,4}", p).group()
            else:
                ad['name'] = ad['name'] + p
        return result
def main():
	f = open("makefile2wrappers.txt","r");
	lins = f.readlines();
	f.close();
	
	for l in lins:
		l = l.strip();
		if len(l)==0:
			continue;

		print('Line: '+l);
		# $(C) -DDINT -c ../Source/umf_analyze.c -o umf_i_analyze.o
		defs=re.match(".*\)(.*)-c",l).group(1).strip();
		# If there's no "-o" flag, just compile the file as is:
		if re.search('.*-o.*',l)!=None:
			src=re.match(".*-c(.*)-o",l).group(1).strip();
			out=re.match(".*-o(.*)",l).group(1).strip();
			f='SourceWrappers/'+out+".c";
			print(' => Creating '+f+'\n');
			o = open(f,"w");
			DEFs = defs.strip().split("-D");
			DEFs = [x for x in DEFs if x]; # Remove empty 
			for d in DEFs:
				o.write('#define '+d+'\n');
			o.write('#include <'+src+'>'+'\n');
			o.close();	
		else:
			src=re.match(".*-c(.*)",l).group(1).strip();
			f = "SourceWrappers/"+os.path.basename(src);
			print(' => Creating '+f+'\n');
			o = open(f,"w");
			o.write('#include <'+src+'>'+'\n');
			o.close();
				
	return 0
    def test_various_ops(self):
        # This takes about n/3 seconds to run (about n/3 clumps of tasks,
        # times about 1 second per clump).
        NUMTASKS = 10

        # no more than 3 of the 10 can run at once
        sema = threading.BoundedSemaphore(value=3)
        mutex = threading.RLock()
        numrunning = Counter()

        threads = []

        for i in range(NUMTASKS):
            t = TestThread("<thread %d>" % i, self, sema, mutex, numrunning)
            threads.append(t)
            self.assertEqual(t.ident, None)
            self.assertTrue(re.match("<TestThread\(.*, initial\)>", repr(t)))
            t.start()

        if verbose:
            print("waiting for all tasks to complete")
        for t in threads:
            t.join(NUMTASKS)
            self.assertTrue(not t.is_alive())
            self.assertNotEqual(t.ident, 0)
            self.assertFalse(t.ident is None)
            self.assertTrue(re.match("<TestThread\(.*, stopped -?\d+\)>", repr(t)))
        if verbose:
            print("all tasks done")
        self.assertEqual(numrunning.get(), 0)
Beispiel #23
0
def parse_requirements(requirements_file='requirements.txt'):
    requirements = []
    with open(requirements_file, 'r') as f:
        for line in f:
            # For the requirements list, we need to inject only the portion
            # after egg= so that distutils knows the package it's looking for
            # such as:
            # -e git://github.com/openstack/nova/master#egg=nova
            if re.match(r'\s*-e\s+', line):
                requirements.append(re.sub(r'\s*-e\s+.*#egg=(.*)$', r'\1',
                                    line))
            # such as:
            # http://github.com/openstack/nova/zipball/master#egg=nova
            elif re.match(r'\s*https?:', line):
                requirements.append(re.sub(r'\s*https?:.*#egg=(.*)$', r'\1',
                                    line))
            # -f lines are for index locations, and don't get used here
            elif re.match(r'\s*-f\s+', line):
                pass
            # -r lines are for including other files, and don't get used here
            elif re.match(r'\s*-r\s+', line):
                pass
            # argparse is part of the standard library starting with 2.7
            # adding it to the requirements list screws distro installs
            elif line == 'argparse' and sys.version_info >= (2, 7):
                pass
            else:
                requirements.append(line.strip())
    return requirements
Beispiel #24
0
def parse_template(template_name):
    """Given a template name, attempt to extract its group name and upload date

    Returns:
        * None if no groups matched
        * group_name, datestamp of the first matching group. group name will be a string,
          datestamp with be a :py:class:`datetime.date <python:datetime.date>`, or None if
          a date can't be derived from the template name
    """
    for group_name, regex in stream_matchers:
        matches = re.match(regex, template_name)
        if matches:
            groups = matches.groupdict()
            # hilarity may ensue if this code is run right before the new year
            today = date.today()
            year = int(groups.get('year', today.year))
            month, day = int(groups['month']), int(groups['day'])
            # validate the template date by turning into a date obj
            template_date = futurecheck(date(year, month, day))
            return TemplateInfo(group_name, template_date, True)
    for group_name, regex in generic_matchers:
        matches = re.match(regex, template_name)
        if matches:
            return TemplateInfo(group_name, None, False)
    # If no match, unknown
    return TemplateInfo('unknown', None, False)
Beispiel #25
0
    def importAuto(cls, string, path=None, activeFit=None, callback=None, encoding=None):
        # Get first line and strip space symbols of it to avoid possible detection errors
        firstLine = re.split("[\n\r]+", string.strip(), maxsplit=1)[0]
        firstLine = firstLine.strip()

        # If XML-style start of tag encountered, detect as XML
        if re.match("<", firstLine):
            if encoding:
                return "XML", cls.importXml(string, callback, encoding)
            else:
                return "XML", cls.importXml(string, callback)

        # If JSON-style start, parse as CREST/JSON
        if firstLine[0] == '{':
            return "JSON", (cls.importCrest(string),)

        # If we've got source file name which is used to describe ship name
        # and first line contains something like [setup name], detect as eft config file
        if re.match("\[.*\]", firstLine) and path is not None:
            filename = os.path.split(path)[1]
            shipName = filename.rsplit('.')[0]
            return "EFT Config", cls.importEftCfg(shipName, string, callback)

        # If no file is specified and there's comma between brackets,
        # consider that we have [ship, setup name] and detect like eft export format
        if re.match("\[.*,.*\]", firstLine):
            return "EFT", (cls.importEft(string),)

        # Use DNA format for all other cases
        return "DNA", (cls.importDna(string),)
def parse(fh):
    stats = []
    for line in fh:
        m = re.match(r'TRANSLATION\s+(?P<content>.*)\n', line)
        if not m:
            continue

        line = m.group('content')
        m = re.match(r'(?P<group>[a-zA-Z_@.]+):', line)
        if not m:
            sys.stderr.write('Malformed TRANSLATION line: %s\n' % line)
            continue

        stat = {'group': m.group('group')}

        if stat['group'] == 'total':
            continue
        else:
            sum = 0
            for x in stat_types:
                m = re.search(r'\b(?P<count>\d+) %s (message|translation)' % x,
                              line)
                if m:
                    stat[x] = int(m.group('count'))
                    sum += stat[x]
            stat['total'] = sum
        stats.append(stat)

    return stats
Beispiel #27
0
def check_api_version_decorator(logical_line, previous_logical, blank_before,
                                filename):
    msg = ("N332: the api_version decorator must be the first decorator"
           " on a method.")
    if blank_before == 0 and re.match(api_version_re, logical_line) \
           and re.match(decorator_re, previous_logical):
        yield(0, msg)
Beispiel #28
0
    def history(self, page):
        GIT_COMMIT_FIELDS = ["commit", "author", "date", "date_relative", "message"]
        GIT_LOG_FORMAT = "%x1f".join(["%h", "%an", "%ad", "%ar", "%s"]) + "%x1e"
        output = git.log("--format=%s" % GIT_LOG_FORMAT, "--follow", "-z", "--shortstat", page.abspath)
        output = output.split("\n")
        history = []
        for line in output:
            if "\x1f" in line:
                log = line.strip("\x1e\x00").split("\x1f")
                history.append(dict(zip(GIT_COMMIT_FIELDS, log)))
            else:
                insertion = re.match(r".* (\d+) insertion", line)
                deletion = re.match(r".* (\d+) deletion", line)
                history[-1]["insertion"] = int(insertion.group(1)) if insertion else 0
                history[-1]["deletion"] = int(deletion.group(1)) if deletion else 0

        max_changes = float(max([(v["insertion"] + v["deletion"]) for v in history])) or 1.0
        for v in history:
            v.update(
                {
                    "insertion_relative": str((v["insertion"] / max_changes) * 100),
                    "deletion_relative": str((v["deletion"] / max_changes) * 100),
                }
            )
        return history
def readFile(fileV4, fileV6, trie):
	
	# open ipv4 file
	input = open(fileV4, "r")
	pattern = '(\d+)\,(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/(\d{1,2}).*'
	
	for line in input:
		result = re.match(pattern, line)
		if result:
			address = result.group(2)
			length = result.group(3)
			asn = result.group(1)
			update = True
			withdrawal = False
			count = 0
			insertTrie(trie, address, length, asn, update, withdrawal, count)

	# open ipv6 file
	input = open(fileV6, "r")
	pattern = '(\d+)\,(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))/(\d{1,3}),.*'

	for line in input:
		result = re.match(pattern, line)
		if result:
			address = result.group(2)
			length = result.group(32)
			asn = result.group(1)
			update = True
			withdrawal = False
			count = 0

			insertTrie(trie, address, length, asn, update, withdrawal, count)
	
	return trie
def parse_report(path):
    """ Return the volume informations contained in the SIENAX report. This
        is a dictionary with keys "grey", "white", and "brain". 
        The informations for the different tissues is a dictionary with the
        normalized and raw values, in cubic millimeters.
        
        adapted from: http://code.google.com/p/medipy/source/browse/plugins/fsl/sienax.py
        see licence: http://code.google.com/p/medipy/source/browse/LICENSE
    """
    
    report = {}
    
    fd = open(path)
    for line in fd.readlines() :        
        for tissue in ["GREY", "WHITE", "BRAIN"] :
            pattern = tissue + r"\s+([\d+\.]+)\s+([\d+\.]+)"
            measure = re.match(pattern, line)
            if measure :
                normalized = float(measure.group(1))
                raw = float(measure.group(2))
                report[tissue.lower()] = {"normalized" : normalized, "raw" : raw}
                continue
        
        vscale = re.match("VSCALING ([\d\.]+)", line)
        if vscale :
            report["vscale"] = float(vscale.group(1))
    
    return report
Beispiel #31
0
    def LoadUniFile(self, File = None):
        if File == None:
            EdkLogger.error("Unicode File Parser", PARSER_ERROR, 'No unicode file is given')
        self.File = File
        #
        # Process special char in file
        #
        Lines = self.PreProcess(File)

        #
        # Get Unicode Information
        #
        for IndexI in range(len(Lines)):
            Line = Lines[IndexI]
            if (IndexI + 1) < len(Lines):
                SecondLine = Lines[IndexI + 1]
            if (IndexI + 2) < len(Lines):
                ThirdLine = Lines[IndexI + 2]

            #
            # Get Language def information
            #
            if Line.find(u'#langdef ') >= 0:
                self.GetLangDef(File, Line)
                continue

            Name = ''
            Language = ''
            Value = ''
            #
            # Get string def information format 1 as below
            #
            #     #string MY_STRING_1
            #     #language eng
            #     My first English string line 1
            #     My first English string line 2
            #     #string MY_STRING_1
            #     #language spa
            #     Mi segunda secuencia 1
            #     Mi segunda secuencia 2
            #
            if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \
                SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \
                ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0:
                Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ')
                Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ')
                for IndexJ in range(IndexI + 2, len(Lines)):
                    if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0:
                        Value = Value + Lines[IndexJ]
                    else:
                        IndexI = IndexJ
                        break
                # Value = Value.replace(u'\r\n', u'')
                Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File)
                # Check the string name is the upper character
                if not self.IsCompatibleMode and Name != '':
                    MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE)
                    if MatchString == None or MatchString.end(0) != len(Name):
                        EdkLogger.error('Unicode File Parser', FORMAT_INVALID, 'The string token name %s defined in UNI file %s contains the invalid lower case character.' %(Name, self.File))
                self.AddStringToList(Name, Language, Value)
                continue

            #
            # Get string def information format 2 as below
            #
            #     #string MY_STRING_1     #language eng     "My first English string line 1"
            #                                               "My first English string line 2"
            #                             #language spa     "Mi segunda secuencia 1"
            #                                               "Mi segunda secuencia 2"
            #     #string MY_STRING_2     #language eng     "My first English string line 1"
            #                                               "My first English string line 2"
            #     #string MY_STRING_2     #language spa     "Mi segunda secuencia 1"
            #                                               "Mi segunda secuencia 2"
            #
            if Line.find(u'#string ') >= 0 and Line.find(u'#language ') >= 0:
                StringItem = Line
                for IndexJ in range(IndexI + 1, len(Lines)):
                    if Lines[IndexJ].find(u'#string ') >= 0 and Lines[IndexJ].find(u'#language ') >= 0:
                        IndexI = IndexJ
                        break
                    elif Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') >= 0:
                        StringItem = StringItem + Lines[IndexJ]
                    elif Lines[IndexJ].count(u'\"') >= 2:
                        StringItem = StringItem[ : StringItem.rfind(u'\"')] + Lines[IndexJ][Lines[IndexJ].find(u'\"') + len(u'\"') : ]
                self.GetStringObject(StringItem)
                continue
Beispiel #32
0
def _extend(filename, n, keys=()):
    """
    For internal use only. Extend a file.

    :param file: str
    :param n: int
    :param keys: tuple
    :return: str, set
    """

    with open(filename, 'r') as file:
        header = file.readline()
        reader = csv.reader(file)
        lines = [_ for _ in reader]

    fname = f"{filename}_{n}.csv"
    with open(fname, 'w') as file:
        file.write(header)
        for line in lines:
            file.write(','.join(line) + '\n')
        # file.writelines([','.join(x) for x in lines])
        # file.write('\n')

        if not keys:
            these_keys = set([line[0].strip() for line in lines])
        else:
            these_keys = set()
            n = n // 5

        for i in range(n):
            for line in lines:
                mod_words = line[:]

                if keys:  # Use provided users and products
                    uid = random.choice(keys[0])
                    pid = random.choice(keys[1])

                    counter = 0
                    while (uid, pid) in these_keys:
                        uid = random.choice(keys[0])
                        pid = random.choice(keys[1])
                        if counter > 100:
                            break

                    if (uid, pid) in these_keys:
                        continue

                    file.write(f"{uid}, {pid}, {random.randint(1, int(mod_words[-1].strip()) * 2)}\n")
                else:
                    mod_key = ''.join([random.choice(string.ascii_letters) for _ in range(len(mod_words[0]))])
                    while mod_key.strip() in these_keys:
                        mod_key = ''.join([random.choice(string.ascii_letters) for _ in range(len(mod_words[0]))])
                    these_keys.add(mod_key)
                    mod_words[0] = mod_key

                    for j, word in enumerate(line[1:], 1):
                        # If a phone number, randomize digits
                        if re.match(r"\d{3}-\d{3}-\d{4}", word.strip()):
                            num = f"{random.randint(0, 9999999999):09d}"
                            mod_words[j] = num[:3] + '-' + num[3:6] + '-' + num[-4:]
                        # If a number, randomize
                        elif re.fullmatch(r"\d*", word.strip()):
                            num = random.randint(1, int(word.strip()) * 2)
                            mod_words[j] = str(num)
                        else:  # Replace 1/2 of characters with random digits
                            mod_locs = [random.randint(0, len(word) - 1) for _ in range(len(word) // 2)]
                            lst = list(word)
                            for loc in mod_locs:
                                lst[loc] = random.choice(string.ascii_letters)
                            mod_words[j] = ''.join(lst)

                    file.write(','.join(mod_words) + '\n')
            # file.writelines([]) for line in lines])

    return fname, these_keys
Beispiel #33
0
def _read_cells(f, line):
    # If the line is self-contained, it is merely a declaration of the total
    # number of points.
    if line.count('(') == line.count(')'):
        return None, None

    out = re.match('\\s*\\(\\s*(|20|30)12\\s*\\(([^\\)]+)\\).*', line)
    a = [int(num, 16) for num in out.group(2).split()]
    assert len(a) > 4
    first_index = a[1]
    last_index = a[2]
    num_cells = last_index - first_index + 1
    element_type = a[4]

    element_type_to_key_num_nodes = {
        0: ('mixed', None),
        1: ('triangle', 3),
        2: ('tetra', 4),
        3: ('quad', 4),
        4: ('hexahedron', 8),
        5: ('pyra', 5),
        6: ('wedge', 6),
    }

    key, num_nodes_per_cell = \
        element_type_to_key_num_nodes[element_type]

    # Skip to the opening `(` and make sure that there's no non-whitespace
    # character between the last closing bracket and the `(`.
    if line.strip()[-1] != '(':
        c = None
        while True:
            c = f.read(1).decode('utf-8')
            if c == '(':
                break
            if not re.match('\\s', c):
                # Found a non-whitespace character before `(`.
                # Assume this is just a declaration line then and
                # skip to the closing bracket.
                _skip_to(f, ')')
                return None, None

    assert key != 'mixed'

    # read cell data
    if out.group(1) == '':
        # ASCII cells
        data = numpy.empty((num_cells, num_nodes_per_cell), dtype=int)
        for k in range(num_cells):
            line = f.readline().decode('utf-8')
            dat = line.split()
            assert len(dat) == num_nodes_per_cell
            data[k] = [int(d, 16) for d in dat]
    else:
        # binary cells
        if out.group(1) == '20':
            bytes_per_item = 4
            dtype = numpy.int32
        else:
            assert out.group(1) == '30'
            bytes_per_item = 8
            dtype = numpy.int64
        total_bytes = \
            bytes_per_item * num_nodes_per_cell * num_cells
        data = numpy.fromstring(f.read(total_bytes),
                                count=(num_nodes_per_cell * num_cells),
                                dtype=dtype).reshape(
                                    (num_cells, num_nodes_per_cell))

    # make sure that the data set is properly closed
    _skip_close(f, 2)
    return key, data
Beispiel #34
0
def read(filename):
    # Initialize the data optional data fields
    field_data = {}
    cell_data = {}
    point_data = {}

    points = []
    cells = {}

    first_point_index_overall = None
    last_point_index = None

    # read file in binary mode since some data might be binary
    with open(filename, 'rb') as f:
        while True:
            line = f.readline().decode('utf-8')
            if not line:
                break

            if line.strip() == '':
                continue

            # expect the line to have the form
            #  (<index> [...]
            out = re.match('\\s*\\(\\s*([0-9]+).*', line)
            assert out
            index = out.group(1)

            if index == '0':
                # Comment.
                _skip_close(f, line.count('(') - line.count(')'))
            elif index == '1':
                # header
                # (1 "<text>")
                _skip_close(f, line.count('(') - line.count(')'))
            elif index == '2':
                # dimensionality
                # (2 3)
                _skip_close(f, line.count('(') - line.count(')'))
            elif re.match('(|20|30)10', index):
                # points
                pts, first_point_index_overall, last_point_index = \
                        _read_points(
                                f, line, first_point_index_overall,
                                last_point_index
                                )

                if pts is not None:
                    points.append(pts)

            elif re.match('(|20|30)12', index):
                # cells
                # (2012 (zone-id first-index last-index type element-type))
                key, data = _read_cells(f, line)
                if data is not None:
                    cells[key] = data

            elif re.match('(|20|30)13', index):
                data = _read_faces(f, line)

                for key in data:
                    if key in cells:
                        cells[key] = numpy.concatenate([cells[key], data[key]])
                    else:
                        cells[key] = data[key]

            elif index == '39':
                logging.warning(
                    'Zone specification not supported yet. Skipping.')
                _skip_close(f, line.count('(') - line.count(')'))

            elif index == '45':
                # (45 (2 fluid solid)())
                obj = re.match('\\(45 \\([0-9]+ ([\\S]+) ([\\S]+)\\)\\(\\)\\)',
                               line)
                if obj:
                    logging.warning(
                        'Zone specification not supported yet (%r, %r). '
                        'Skipping.', obj.group(1), obj.group(2))
                else:
                    logging.warning('Zone specification not supported yet.')

            else:
                logging.warning('Unknown index %r. Skipping.', index)
                # Skipping ahead to the next line with two closing brackets.
                _skip_close(f, line.count('(') - line.count(')'))

    points = numpy.concatenate(points)

    # Gauge the cells with the first point_index.
    for key in cells:
        cells[key] -= first_point_index_overall

    return points, cells, point_data, cell_data, field_data
Beispiel #35
0
def _read_faces(f, line):
    # faces
    # (13 (zone-id first-index last-index type element-type))

    # If the line is self-contained, it is merely a declaration of
    # the total number of points.
    if line.count('(') == line.count(')'):
        return {}

    out = re.match('\\s*\\(\\s*(|20|30)13\\s*\\(([^\\)]+)\\).*', line)
    a = [int(num, 16) for num in out.group(2).split()]

    assert len(a) > 4
    first_index = a[1]
    last_index = a[2]
    num_cells = last_index - first_index + 1
    element_type = a[4]

    element_type_to_key_num_nodes = {
        0: ('mixed', None),
        2: ('line', 2),
        3: ('triangle', 3),
        4: ('quad', 4)
    }

    key, num_nodes_per_cell = \
        element_type_to_key_num_nodes[element_type]

    # Skip ahead to the line that opens the data block (might be
    # the current line already).
    if line.strip()[-1] != '(':
        _skip_to(f, '(')

    data = {}
    if out.group(1) == '':
        # ASCII
        if key == 'mixed':
            # From
            # <http://www.afs.enea.it/fluent/Public/Fluent-Doc/PDF/chp03.pdf>:
            # > If the face zone is of mixed type (element-type =
            # > 0), the body of the section will include the face
            # > type and will appear as follows
            # >
            # > type v0 v1 v2 c0 c1
            # >
            for k in range(num_cells):
                line = ''
                while line.strip() == '':
                    line = f.readline().decode('utf-8')
                dat = line.split()
                type_index = int(dat[0], 16)
                assert type_index != 0
                type_string, num_nodes_per_cell = \
                    element_type_to_key_num_nodes[type_index]
                assert len(dat) == num_nodes_per_cell + 3

                if type_string not in data:
                    data[type_string] = []

                data[type_string].append(
                    [int(d, 16) for d in dat[1:num_nodes_per_cell + 1]])

            data = {key: numpy.array(data[key]) for key in data}

        else:
            # read cell data
            data = numpy.empty((num_cells, num_nodes_per_cell), dtype=int)
            for k in range(num_cells):
                line = f.readline().decode('utf-8')
                dat = line.split()
                # The body of a regular face section contains the
                # grid connectivity, and each line appears as
                # follows:
                #   n0 n1 n2 cr cl
                # where n* are the defining nodes (vertices) of the
                # face, and c* are the adjacent cells.
                assert len(dat) == num_nodes_per_cell + 2
                data[k] = [int(d, 16) for d in dat[:num_nodes_per_cell]]
            data = {key: data}
    else:
        # binary
        if out.group(1) == '20':
            bytes_per_item = 4
            dtype = numpy.int32
        else:
            assert out.group(1) == '30'
            bytes_per_item = 8
            dtype = numpy.int64

        assert key != 'mixed'

        # Read cell data.
        # The body of a regular face section contains the grid
        # connectivity, and each line appears as follows:
        #   n0 n1 n2 cr cl
        # where n* are the defining nodes (vertices) of the face,
        # and c* are the adjacent cells.
        total_bytes = \
            num_cells * bytes_per_item * (num_nodes_per_cell + 2)
        data = numpy.fromstring(f.read(total_bytes), dtype=dtype).reshape(
            (num_cells, num_nodes_per_cell + 2))
        # Cut off the adjacent cell data.
        data = data[:, :num_nodes_per_cell]
        data = {key: data}

    # make sure that the data set is properly closed
    _skip_close(f, 2)

    return data
def HDF5_ATL11_corr_write(IS2_atl11_corr, IS2_atl11_attrs, INPUT=None,
    FILENAME='', FILL_VALUE=None, DIMENSIONS=None, CROSSOVERS=False,
    CLOBBER=False):
    # setting HDF5 clobber attribute
    if CLOBBER:
        clobber = 'w'
    else:
        clobber = 'w-'

    # open output HDF5 file
    fileID = h5py.File(os.path.expanduser(FILENAME), clobber)

    # create HDF5 records
    h5 = {}

    # number of GPS seconds between the GPS epoch (1980-01-06T00:00:00Z UTC)
    # and ATLAS Standard Data Product (SDP) epoch (2018-01-01T00:00:00Z UTC)
    h5['ancillary_data'] = {}
    for k,v in IS2_atl11_corr['ancillary_data'].items():
        # Defining the HDF5 dataset variables
        val = 'ancillary_data/{0}'.format(k)
        h5['ancillary_data'][k] = fileID.create_dataset(val, np.shape(v), data=v,
            dtype=v.dtype, compression='gzip')
        # add HDF5 variable attributes
        for att_name,att_val in IS2_atl11_attrs['ancillary_data'][k].items():
            h5['ancillary_data'][k].attrs[att_name] = att_val

    # write each output beam pair
    pairs = [k for k in IS2_atl11_corr.keys() if bool(re.match(r'pt\d',k))]
    for ptx in pairs:
        fileID.create_group(ptx)
        h5[ptx] = {}
        # add HDF5 group attributes for beam
        for att_name in ['description','beam_pair','ReferenceGroundTrack',
            'first_cycle','last_cycle','equatorial_radius','polar_radius']:
            fileID[ptx].attrs[att_name] = IS2_atl11_attrs[ptx][att_name]

        # ref_pt, cycle number, geolocation and delta_time variables
        for k in ['ref_pt','cycle_number','delta_time','latitude','longitude']:
            # values and attributes
            v = IS2_atl11_corr[ptx][k]
            attrs = IS2_atl11_attrs[ptx][k]
            fillvalue = FILL_VALUE[ptx][k]
            # Defining the HDF5 dataset variables
            val = '{0}/{1}'.format(ptx,k)
            if fillvalue:
                h5[ptx][k] = fileID.create_dataset(val, np.shape(v), data=v,
                    dtype=v.dtype, fillvalue=fillvalue, compression='gzip')
            else:
                h5[ptx][k] = fileID.create_dataset(val, np.shape(v), data=v,
                    dtype=v.dtype, compression='gzip')
            # create or attach dimensions for HDF5 variable
            if DIMENSIONS[ptx][k]:
                # attach dimensions
                for i,dim in enumerate(DIMENSIONS[ptx][k]):
                    h5[ptx][k].dims[i].attach_scale(h5[ptx][dim])
            else:
                # make dimension
                h5[ptx][k].make_scale(k)
            # add HDF5 variable attributes
            for att_name,att_val in attrs.items():
                h5[ptx][k].attrs[att_name] = att_val

        # add to cycle_stats variables
        groups = ['cycle_stats']
        # if running crossovers: add to crossing_track_data variables
        if CROSSOVERS:
            groups.append('crossing_track_data')
        for key in groups:
            fileID[ptx].create_group(key)
            h5[ptx][key] = {}
            for att_name in ['Description','data_rate']:
                att_val=IS2_atl11_attrs[ptx][key][att_name]
                fileID[ptx][key].attrs[att_name] = att_val
            for k,v in IS2_atl11_corr[ptx][key].items():
                # attributes
                attrs = IS2_atl11_attrs[ptx][key][k]
                fillvalue = FILL_VALUE[ptx][key][k]
                # Defining the HDF5 dataset variables
                val = '{0}/{1}/{2}'.format(ptx,key,k)
                if fillvalue:
                    h5[ptx][key][k] = fileID.create_dataset(val, np.shape(v), data=v,
                        dtype=v.dtype, fillvalue=fillvalue, compression='gzip')
                else:
                    h5[ptx][key][k] = fileID.create_dataset(val, np.shape(v), data=v,
                        dtype=v.dtype, compression='gzip')
                # create or attach dimensions for HDF5 variable
                if DIMENSIONS[ptx][key][k]:
                    # attach dimensions
                    for i,dim in enumerate(DIMENSIONS[ptx][key][k]):
                        if (key == 'cycle_stats'):
                            h5[ptx][key][k].dims[i].attach_scale(h5[ptx][dim])
                        else:
                            h5[ptx][key][k].dims[i].attach_scale(h5[ptx][key][dim])
                else:
                    # make dimension
                    h5[ptx][key][k].make_scale(k)
                # add HDF5 variable attributes
                for att_name,att_val in attrs.items():
                    h5[ptx][key][k].attrs[att_name] = att_val

    # HDF5 file title
    fileID.attrs['featureType'] = 'trajectory'
    fileID.attrs['title'] = 'ATLAS/ICESat-2 Annual Land Ice Height'
    fileID.attrs['summary'] = ('The purpose of ATL11 is to provide an ICESat-2 '
        'satellite cycle summary of heights and height changes of land-based '
        'ice and will be provided as input to ATL15 and ATL16, gridded '
        'estimates of heights and height-changes.')
    fileID.attrs['description'] = ('Land ice parameters for each beam pair. '
        'All parameters are calculated for the same along-track increments '
        'for each beam pair and repeat.')
    date_created = datetime.datetime.today()
    fileID.attrs['date_created'] = date_created.isoformat()
    project = 'ICESat-2 > Ice, Cloud, and land Elevation Satellite-2'
    fileID.attrs['project'] = project
    platform = 'ICESat-2 > Ice, Cloud, and land Elevation Satellite-2'
    fileID.attrs['project'] = platform
    # add attribute for elevation instrument and designated processing level
    instrument = 'ATLAS > Advanced Topographic Laser Altimeter System'
    fileID.attrs['instrument'] = instrument
    fileID.attrs['source'] = 'Spacecraft'
    fileID.attrs['references'] = 'https://nsidc.org/data/icesat-2'
    fileID.attrs['processing_level'] = '4'
    # add attributes for input ATL11 files
    fileID.attrs['input_files'] = os.path.basename(INPUT)
    # find geospatial and temporal ranges
    lnmn,lnmx,ltmn,ltmx,tmn,tmx = (np.inf,-np.inf,np.inf,-np.inf,np.inf,-np.inf)
    for ptx in pairs:
        lon = IS2_atl11_corr[ptx]['longitude']
        lat = IS2_atl11_corr[ptx]['latitude']
        delta_time = IS2_atl11_corr[ptx]['delta_time']
        valid = np.nonzero(delta_time != FILL_VALUE[ptx]['delta_time'])
        # setting the geospatial and temporal ranges
        lnmn = lon.min() if (lon.min() < lnmn) else lnmn
        lnmx = lon.max() if (lon.max() > lnmx) else lnmx
        ltmn = lat.min() if (lat.min() < ltmn) else ltmn
        ltmx = lat.max() if (lat.max() > ltmx) else ltmx
        tmn = delta_time[valid].min() if (delta_time[valid].min() < tmn) else tmn
        tmx = delta_time[valid].max() if (delta_time[valid].max() > tmx) else tmx
    # add geospatial and temporal attributes
    fileID.attrs['geospatial_lat_min'] = ltmn
    fileID.attrs['geospatial_lat_max'] = ltmx
    fileID.attrs['geospatial_lon_min'] = lnmn
    fileID.attrs['geospatial_lon_max'] = lnmx
    fileID.attrs['geospatial_lat_units'] = "degrees_north"
    fileID.attrs['geospatial_lon_units'] = "degrees_east"
    fileID.attrs['geospatial_ellipsoid'] = "WGS84"
    fileID.attrs['date_type'] = 'UTC'
    fileID.attrs['time_type'] = 'CCSDS UTC-A'
    # convert start and end time from ATLAS SDP seconds into Julian days
    JD = convert_delta_time(np.array([tmn,tmx]))['julian']
    # convert to calendar date
    YY,MM,DD,HH,MN,SS = SMBcorr.time.convert_julian(JD,FORMAT='tuple')
    # add attributes with measurement date start, end and duration
    tcs = datetime.datetime(int(YY[0]), int(MM[0]), int(DD[0]),
        int(HH[0]), int(MN[0]), int(SS[0]), int(1e6*(SS[0] % 1)))
    fileID.attrs['time_coverage_start'] = tcs.isoformat()
    tce = datetime.datetime(int(YY[1]), int(MM[1]), int(DD[1]),
        int(HH[1]), int(MN[1]), int(SS[1]), int(1e6*(SS[1] % 1)))
    fileID.attrs['time_coverage_end'] = tce.isoformat()
    fileID.attrs['time_coverage_duration'] = '{0:0.0f}'.format(tmx-tmn)
    # Closing the HDF5 file
    fileID.close()
def interp_SMB_ICESat2(base_dir, FILE, model_version, CROSSOVERS=False,
    GZIP=False, VERBOSE=False, MODE=0o775):

    # read data from input file
    print('{0} -->'.format(os.path.basename(FILE))) if VERBOSE else None
    # Open the HDF5 file for reading
    fileID = h5py.File(FILE, 'r')
    # output data directory
    ddir = os.path.dirname(FILE)
    # extract parameters from ICESat-2 ATLAS HDF5 file name
    rx = re.compile(r'(processed_)?(ATL\d{2})_(\d{4})(\d{2})_(\d{2})(\d{2})_'
        r'(\d{3})_(\d{2})(.*?).h5$')
    SUB,PRD,TRK,GRAN,SCYC,ECYC,RL,VERS,AUX = rx.findall(FILE).pop()
    # get projection and region name based on granule
    REGION,proj4_params = set_projection(GRAN)
    # determine main model group from region and model_version
    MODEL, = [key for key,val in models[REGION].items() if model_version in val]

    # keyword arguments for all models
    KWARGS = dict(SIGMA=1.5, FILL_VALUE=np.nan)
    # set model specific parameters
    if (MODEL == 'MAR'):
        match_object=re.match(r'(MARv\d+\.\d+(.\d+)?)',model_version)
        MAR_VERSION=match_object.group(0)
        MAR_REGION=dict(GL='Greenland',AA='Antarctic')[REGION]
        # model subdirectories
        SUBDIRECTORY=dict(AA={}, GL={})
        SUBDIRECTORY['GL']['MARv3.9-ERA']=['ERA_1958-2018_10km','daily_10km']
        SUBDIRECTORY['GL']['MARv3.10-ERA']=['ERA_1958-2019-15km','daily_15km']
        SUBDIRECTORY['GL']['MARv3.11-NCEP']=['NCEP1_1948-2020_20km','daily_20km']
        SUBDIRECTORY['GL']['MARv3.11-ERA']=['ERA_1958-2019-15km','daily_15km']
        SUBDIRECTORY['GL']['MARv3.11.2-ERA-6km']=['6km_ERA5']
        SUBDIRECTORY['GL']['MARv3.11.2-ERA-7.5km']=['7.5km_ERA5']
        SUBDIRECTORY['GL']['MARv3.11.2-ERA-10km']=['10km_ERA5']
        SUBDIRECTORY['GL']['MARv3.11.2-ERA-15km']=['15km_ERA5']
        SUBDIRECTORY['GL']['MARv3.11.2-ERA-20km']=['20km_ERA5']
        SUBDIRECTORY['GL']['MARv3.11.2-NCEP-20km']=['20km_NCEP1']
        SUBDIRECTORY['GL']['MARv3.11.5-ERA-6km']=['6km_ERA5']
        SUBDIRECTORY['GL']['MARv3.11.5-ERA-10km']=['10km_ERA5']
        SUBDIRECTORY['GL']['MARv3.11.5-ERA-15km']=['15km_ERA5']
        SUBDIRECTORY['GL']['MARv3.11.5-ERA-20km']=['20km_ERA5']
        MAR_MODEL=SUBDIRECTORY[REGION][model_version]
        DIRECTORY=os.path.join(base_dir,'MAR',MAR_VERSION,MAR_REGION,*MAR_MODEL)
        # keyword arguments for variable coordinates
        MAR_KWARGS=dict(AA={}, GL={})
        MAR_KWARGS['GL']['MARv3.9-ERA'] = dict(XNAME='X10_153',YNAME='Y21_288')
        MAR_KWARGS['GL']['MARv3.10-ERA'] = dict(XNAME='X10_105',YNAME='Y21_199')
        MAR_KWARGS['GL']['MARv3.11-NCEP'] = dict(XNAME='X12_84',YNAME='Y21_155')
        MAR_KWARGS['GL']['MARv3.11-ERA'] = dict(XNAME='X10_105',YNAME='Y21_199')
        MAR_KWARGS['GL']['MARv3.11.2-ERA-6km'] = dict(XNAME='X12_251',YNAME='Y20_465')
        MAR_KWARGS['GL']['MARv3.11.2-ERA-7.5km'] = dict(XNAME='X12_203',YNAME='Y20_377')
        MAR_KWARGS['GL']['MARv3.11.2-ERA-10km'] = dict(XNAME='X10_153',YNAME='Y21_288')
        MAR_KWARGS['GL']['MARv3.11.2-ERA-15km'] = dict(XNAME='X10_105',YNAME='Y21_199')
        MAR_KWARGS['GL']['MARv3.11.2-ERA-20km'] = dict(XNAME='X12_84',YNAME='Y21_155')
        MAR_KWARGS['GL']['MARv3.11.2-NCEP-20km'] = dict(XNAME='X12_84',YNAME='Y21_155')
        MAR_KWARGS['GL']['MARv3.11.5-ERA-6km'] = dict(XNAME='X12_251',YNAME='Y20_465')
        MAR_KWARGS['GL']['MARv3.11.5-ERA-10km'] = dict(XNAME='X10_153',YNAME='Y21_288')
        MAR_KWARGS['GL']['MARv3.11.5-ERA-15km'] = dict(XNAME='X10_105',YNAME='Y21_199')
        MAR_KWARGS['GL']['MARv3.11.5-ERA-20km'] = dict(XNAME='X12_84',YNAME='Y21_155')
        KWARGS.update(MAR_KWARGS[REGION][model_version])
        # netCDF4 variable names for direct fields
        VARIABLES = ['SMB','ZN6','ZN4','ZN5']
        # output variable keys for both direct and derived fields
        KEYS = ['SMB','zsurf','zfirn','zmelt','zsmb','zaccum']
        # HDF5 longname and description attributes for each variable
        LONGNAME = {}
        LONGNAME['SMB'] = "Cumulative SMB"
        LONGNAME['zsurf'] = "Height"
        LONGNAME['zfirn'] = "Compaction"
        LONGNAME['zmelt'] = "Surface Melt"
        LONGNAME['zsmb'] = "Surface Mass Balance"
        LONGNAME['zaccum'] = "Surface Accumulation"
        DESCRIPTION = {}
        DESCRIPTION['SMB'] = "Cumulative Surface Mass Balance"
        DESCRIPTION['zsurf'] = "Snow Height Change"
        DESCRIPTION['zfirn'] = "Snow Height Change due to Compaction"
        DESCRIPTION['zmelt'] = "Snow Height Change due to Surface Melt"
        DESCRIPTION['zsmb'] = "Snow Height Change due to Surface Mass Balance"
        DESCRIPTION['zaccum'] = "Snow Height Change due to Surface Accumulation"
    elif (MODEL == 'RACMO'):
        RACMO_VERSION,RACMO_MODEL=model_version.split('-')
        # netCDF4 variable names
        VARIABLES = ['hgtsrf']
        # output variable keys
        KEYS = ['zsurf']
        # HDF5 longname attributes for each variable
        LONGNAME = {}
        LONGNAME['zsurf'] = "Height"
        DESCRIPTION = {}
        DESCRIPTION['zsurf'] = "Snow Height Change"
    elif (MODEL == 'MERRA2-hybrid'):
        # regular expression pattern for extracting version
        merra2_regex = re.compile(r'GSFC-fdm-((v\d+)(\.\d+)?)$')
        # get MERRA-2 version and major version
        MERRA2_VERSION = merra2_regex.match(model_version).group(1)
        # MERRA-2 hybrid directory
        DIRECTORY=os.path.join(base_dir,'MERRA2_hybrid',MERRA2_VERSION)
        # MERRA-2 region name from ATL11 region
        MERRA2_REGION = dict(AA='ais',GL='gris')[REGION]
        # keyword arguments for MERRA-2 interpolation programs
        if MERRA2_VERSION in ('v0','v1','v1.0'):
            KWARGS['VERSION'] = merra2_regex.match(model_version).group(2)
            # netCDF4 variable names
            VARIABLES = ['FAC','cum_smb_anomaly','height']
            # add additional Greenland variables
            if (MERRA2_REGION == 'gris'):
                VARIABLES.append('runoff_anomaly')
        else:
            KWARGS['VERSION'] = MERRA2_VERSION.replace('.','_')
            # netCDF4 variable names
            VARIABLES = ['FAC','SMB_a','h_a']
            # add additional Greenland variables
            if (MERRA2_REGION == 'gris'):
                VARIABLES.append('Me_a')
        # use compressed files
        KWARGS['GZIP'] = GZIP
        # output variable keys
        KEYS = ['zsurf','zfirn','zsmb','zmelt']
        # HDF5 longname and description attributes for each variable
        LONGNAME = {}
        LONGNAME['zsurf'] = "Height"
        LONGNAME['zfirn'] = "Compaction"
        LONGNAME['zsmb'] = "Surface Mass Balance"
        LONGNAME['zmelt'] = "Surface Melt"
        DESCRIPTION = {}
        DESCRIPTION['zsurf'] = "Snow Height Change"
        DESCRIPTION['zfirn'] = "Snow Height Change due to Compaction"
        DESCRIPTION['zsmb'] = "Snow Height Change due to Surface Mass Balance"
        DESCRIPTION['zmelt'] = "Snow Height Change due to Surface Melt"

    # pyproj transformer for converting from latitude/longitude
    # into polar stereographic coordinates
    crs1 = pyproj.CRS.from_string("epsg:{0:d}".format(4326))
    crs2 = pyproj.CRS.from_string(proj4_params)
    transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True)

    # read each input beam pair within the file
    IS2_atl11_pairs = []
    for ptx in [k for k in fileID.keys() if bool(re.match(r'pt\d',k))]:
        # check if subsetted beam contains reference points
        try:
            fileID[ptx]['ref_pt']
        except KeyError:
            pass
        else:
            IS2_atl11_pairs.append(ptx)

    # copy variables for outputting to HDF5 file
    IS2_atl11_corr = {}
    IS2_atl11_fill = {}
    IS2_atl11_dims = {}
    IS2_atl11_corr_attrs = {}
    # number of GPS seconds between the GPS epoch (1980-01-06T00:00:00Z UTC)
    # and ATLAS Standard Data Product (SDP) epoch (2018-01-01T00:00:00Z UTC)
    # Add this value to delta time parameters to compute full gps_seconds
    IS2_atl11_corr['ancillary_data'] = {}
    IS2_atl11_corr_attrs['ancillary_data'] = {}
    for key in ['atlas_sdp_gps_epoch']:
        # get each HDF5 variable
        IS2_atl11_corr['ancillary_data'][key] = fileID['ancillary_data'][key][:]
        # Getting attributes of group and included variables
        IS2_atl11_corr_attrs['ancillary_data'][key] = {}
        for att_name,att_val in fileID['ancillary_data'][key].attrs.items():
            IS2_atl11_corr_attrs['ancillary_data'][key][att_name] = att_val
    # HDF5 group name for across-track data
    XT = 'crossing_track_data'

    # for each input beam pair within the file
    for ptx in sorted(IS2_atl11_pairs):
        # output data dictionaries for beam
        IS2_atl11_corr[ptx] = dict(cycle_stats=collections.OrderedDict(),
            crossing_track_data=collections.OrderedDict())
        IS2_atl11_fill[ptx] = dict(cycle_stats={},crossing_track_data={})
        IS2_atl11_dims[ptx] = dict(cycle_stats={},crossing_track_data={})
        IS2_atl11_corr_attrs[ptx] = dict(cycle_stats={},crossing_track_data={})

        # extract along-track and across-track variables
        ref_pt = {}
        latitude = {}
        longitude = {}
        delta_time = {}
        groups = ['AT']
        # dictionary with output variables
        OUTPUT = {}
        # number of average segments and number of included cycles
        # fill_value for invalid heights and corrections
        fv = fileID[ptx]['h_corr'].attrs['_FillValue']
        # shape of along-track data
        n_points,n_cycles = fileID[ptx]['delta_time'][:].shape
        # along-track (AT) reference point, latitude, longitude and time
        ref_pt['AT'] = fileID[ptx]['ref_pt'][:].copy()
        latitude['AT'] = np.ma.array(fileID[ptx]['latitude'][:],
            fill_value=fileID[ptx]['latitude'].attrs['_FillValue'])
        latitude['AT'].mask = (latitude['AT'] == latitude['AT'].fill_value)
        longitude['AT'] = np.ma.array(fileID[ptx]['longitude'][:],
            fill_value=fileID[ptx]['longitude'].attrs['_FillValue'])
        longitude['AT'].mask = (longitude['AT'] == longitude['AT'].fill_value)
        delta_time['AT'] = np.ma.array(fileID[ptx]['delta_time'][:],
            fill_value=fileID[ptx]['delta_time'].attrs['_FillValue'])
        delta_time['AT'].mask = (delta_time['AT'] == delta_time['AT'].fill_value)
        # allocate for output height for along-track data
        OUTPUT['AT'] = {}
        for key in KEYS:
            OUTPUT['AT'][key] = np.ma.empty((n_points,n_cycles),fill_value=fv)
            OUTPUT['AT'][key].mask = np.ones((n_points,n_cycles),dtype=bool)
            OUTPUT['AT'][key].interpolation = np.zeros((n_points,n_cycles),dtype=np.uint8)
        # if running ATL11 crossovers
        if CROSSOVERS:
            # add to group
            groups.append('XT')
            # shape of across-track data
            n_cross, = fileID[ptx][XT]['delta_time'].shape
            # across-track (XT) reference point, latitude, longitude and time
            ref_pt['XT'] = fileID[ptx][XT]['ref_pt'][:].copy()
            latitude['XT'] = np.ma.array(fileID[ptx][XT]['latitude'][:],
                fill_value=fileID[ptx][XT]['latitude'].attrs['_FillValue'])
            latitude['XT'].mask = (latitude['XT'] == latitude['XT'].fill_value)
            longitude['XT'] = np.ma.array(fileID[ptx][XT]['longitude'][:],
                fill_value=fileID[ptx][XT]['longitude'].attrs['_FillValue'])
            latitude['XT'].mask = (latitude['XT'] == longitude['XT'].fill_value)
            delta_time['XT'] = np.ma.array(fileID[ptx][XT]['delta_time'][:],
                fill_value=fileID[ptx][XT]['delta_time'].attrs['_FillValue'])
            delta_time['XT'].mask = (delta_time['XT'] == delta_time['XT'].fill_value)
            # allocate for output height for across-track data
            OUTPUT['XT'] = {}
            for key in KEYS:
                OUTPUT['XT'][key] = np.ma.empty((n_cross),fill_value=fv)
                OUTPUT['XT'][key].mask = np.ones((n_cross),dtype=bool)
                OUTPUT['XT'][key].interpolation = np.zeros((n_cross),dtype=np.uint8)

        # extract lat/lon and convert to polar stereographic
        X,Y = transformer.transform(longitude['AT'],longitude['AT'])

        # for each valid cycle of ICESat-2 ATL11 data
        for c in range(n_cycles):
            # find valid elevations for cycle
            valid = np.logical_not(delta_time['AT'].mask[:,c])
            i, = np.nonzero(valid)
            # convert time from ATLAS SDP to date in decimal-years
            tdec = convert_delta_time(delta_time['AT'][i,c])['decimal']
            if (MODEL == 'MAR') and np.any(valid):
                # read and interpolate daily MAR outputs
                for key,var in zip(KEYS,VARIABLES):
                    OUT = SMBcorr.interpolate_mar_daily(DIRECTORY, proj4_params,
                        MAR_VERSION, tdec, X[i], Y[i], VARIABLE=var, **KWARGS)
                    # set attributes to output for iteration
                    OUTPUT['AT'][key].data[i,c] = np.copy(OUT.data)
                    OUTPUT['AT'][key].mask[i,c] = np.copy(OUT.mask)
                    OUTPUT['AT'][key].interpolation[i,c] = np.copy(OUT.interpolation)
                # calculate derived fields
                OUTPUT['AT']['zsmb'].data[i,c] = OUTPUT['AT']['zsurf'].data[i,c] - \
                    OUTPUT['AT']['zfirn'].data[i,c]
                OUTPUT['AT']['zsmb'].mask[i,c] = OUTPUT['AT']['zsurf'].mask[i,c] | \
                    OUTPUT['AT']['zfirn'].mask[i,c]
                OUTPUT['AT']['zaccum'].data[i,c] = OUTPUT['AT']['zsurf'].data[i,c] - \
                    OUTPUT['AT']['zfirn'].data[i,c] - OUTPUT['AT']['zmelt'].data
                OUTPUT['AT']['zaccum'].mask[i,c] = OUTPUT['AT']['zsurf'].mask[i,c] | \
                    OUTPUT['AT']['zfirn'].mask[i,c] | OUTPUT['AT']['zmelt'].mask[i,c]
            elif (MODEL == 'RACMO') and np.any(valid):
                # read and interpolate daily RACMO outputs
                for key,var in zip(KEYS,VARIABLES):
                    OUT = SMBcorr.interpolate_racmo_daily(base_dir, proj4_params,
                        RACMO_MODEL, tdec, X[i], Y[i], VARIABLE=var, **KWARGS)
                    # set attributes to output for iteration
                    OUTPUT['AT'][key].data[i,c] = np.copy(OUT.data)
                    OUTPUT['AT'][key].mask[i,c] = np.copy(OUT.mask)
                    OUTPUT['AT'][key].interpolation[i,c] = np.copy(OUT.interpolation)
            elif (MODEL == 'MERRA2-hybrid') and np.any(valid):
                # read and interpolate 5-day MERRA2-Hybrid outputs
                for key,var in zip(KEYS,VARIABLES):
                    OUT = SMBcorr.interpolate_merra_hybrid(DIRECTORY, proj4_params,
                        MERRA2_REGION, tdec, X[i], Y[i], VARIABLE=var, **KWARGS)
                    # set attributes to output for iteration
                    OUTPUT['AT'][key].data[i,c] = np.copy(OUT.data)
                    OUTPUT['AT'][key].mask[i,c] = np.copy(OUT.mask)
                    OUTPUT['AT'][key].interpolation[i,c] = np.copy(OUT.interpolation)

        #-- if interpolating to ATL11 crossover locations
        if CROSSOVERS:
            # extract lat/lon and convert to polar stereographic
            X,Y = transformer.transform(longitude['XT'],longitude['XT'])
            # find valid elevations for cycle
            valid = np.logical_not(delta_time['XT'].mask[:])
            i, = np.nonzero(valid)
            # convert time from ATLAS SDP to date in decimal-years
            tdec = convert_delta_time(delta_time['XT'][i])['decimal']
            if (MODEL == 'MAR') and np.any(valid):
                # read and interpolate daily MAR outputs
                for key,var in zip(KEYS,VARIABLES):
                    OUT = SMBcorr.interpolate_mar_daily(DIRECTORY, proj4_params,
                        MAR_VERSION, tdec, X[i], Y[i], VARIABLE=var, **KWARGS)
                    # set attributes to output for iteration
                    OUTPUT['XT'][key].data[i] = np.copy(OUT.data)
                    OUTPUT['XT'][key].mask[i] = np.copy(OUT.mask)
                    OUTPUT['XT'][key].interpolation[i] = np.copy(OUT.interpolation)
                # calculate derived fields
                OUTPUT['XT']['zsmb'].data[i] = OUTPUT['XT']['zsurf'].data[i] - \
                    OUTPUT['XT']['zfirn'].data[i]
                OUTPUT['XT']['zsmb'].mask[i] = OUTPUT['XT']['zsurf'].mask[i] | \
                    OUTPUT['XT']['zfirn'].mask[i]
                OUTPUT['XT']['zaccum'].data[i] = OUTPUT['XT']['zsurf'].data[i] - \
                    OUTPUT['XT']['zfirn'].data[i] - OUTPUT['AT']['zmelt'].data[i]
                OUTPUT['XT']['zaccum'].mask[i] = OUTPUT['XT']['zsurf'].mask[i] | \
                    OUTPUT['XT']['zfirn'].mask[i] | OUTPUT['XT']['zmelt'].mask[i]
            elif (MODEL == 'RACMO') and np.any(valid):
                # read and interpolate daily RACMO outputs
                for key,var in zip(KEYS,VARIABLES):
                    OUT = SMBcorr.interpolate_racmo_daily(base_dir, proj4_params,
                        RACMO_MODEL, tdec, X[i], Y[i], VARIABLE=var, **KWARGS)
                    # set attributes to output for iteration
                    OUTPUT['XT'][key].data[i] = np.copy(OUT.data)
                    OUTPUT['XT'][key].mask[i] = np.copy(OUT.mask)
                    OUTPUT['XT'][key].interpolation[i] = np.copy(OUT.interpolation)
            elif (MODEL == 'MERRA2-hybrid') and np.any(valid):
                # read and interpolate 5-day MERRA2-Hybrid outputs
                for key,var in zip(KEYS,VARIABLES):
                    OUT = SMBcorr.interpolate_merra_hybrid(DIRECTORY, proj4_params,
                        MERRA2_REGION, tdec, X[i], Y[i], VARIABLE=var, **KWARGS)
                    # set attributes to output for iteration
                    OUTPUT['XT'][key].data[i] = np.copy(OUT.data)
                    OUTPUT['XT'][key].mask[i] = np.copy(OUT.mask)
                    OUTPUT['XT'][key].interpolation[i] = np.copy(OUT.interpolation)

        # group attributes for beam
        IS2_atl11_corr_attrs[ptx]['description'] = ('Contains the primary science parameters '
            'for this data set')
        IS2_atl11_corr_attrs[ptx]['beam_pair'] = fileID[ptx].attrs['beam_pair']
        IS2_atl11_corr_attrs[ptx]['ReferenceGroundTrack'] = fileID[ptx].attrs['ReferenceGroundTrack']
        IS2_atl11_corr_attrs[ptx]['first_cycle'] = fileID[ptx].attrs['first_cycle']
        IS2_atl11_corr_attrs[ptx]['last_cycle'] = fileID[ptx].attrs['last_cycle']
        IS2_atl11_corr_attrs[ptx]['equatorial_radius'] = fileID[ptx].attrs['equatorial_radius']
        IS2_atl11_corr_attrs[ptx]['polar_radius'] = fileID[ptx].attrs['polar_radius']

        # geolocation, time and reference point
        # reference point
        IS2_atl11_corr[ptx]['ref_pt'] = ref_pt['AT'].copy()
        IS2_atl11_fill[ptx]['ref_pt'] = None
        IS2_atl11_dims[ptx]['ref_pt'] = None
        IS2_atl11_corr_attrs[ptx]['ref_pt'] = collections.OrderedDict()
        IS2_atl11_corr_attrs[ptx]['ref_pt']['units'] = "1"
        IS2_atl11_corr_attrs[ptx]['ref_pt']['contentType'] = "referenceInformation"
        IS2_atl11_corr_attrs[ptx]['ref_pt']['long_name'] = "Reference point number"
        IS2_atl11_corr_attrs[ptx]['ref_pt']['source'] = "ATL06"
        IS2_atl11_corr_attrs[ptx]['ref_pt']['description'] = ("The reference point is the "
            "7 digit segment_id number corresponding to the center of the ATL06 data used "
            "for each ATL11 point.  These are sequential, starting with 1 for the first "
            "segment after an ascending equatorial crossing node.")
        IS2_atl11_corr_attrs[ptx]['ref_pt']['coordinates'] = \
            "delta_time latitude longitude"
        # cycle_number
        IS2_atl11_corr[ptx]['cycle_number'] = fileID[ptx]['cycle_number'][:].copy()
        IS2_atl11_fill[ptx]['cycle_number'] = None
        IS2_atl11_dims[ptx]['cycle_number'] = None
        IS2_atl11_corr_attrs[ptx]['cycle_number'] = collections.OrderedDict()
        IS2_atl11_corr_attrs[ptx]['cycle_number']['units'] = "1"
        IS2_atl11_corr_attrs[ptx]['cycle_number']['long_name'] = "Orbital cycle number"
        IS2_atl11_corr_attrs[ptx]['cycle_number']['source'] = "ATL06"
        IS2_atl11_corr_attrs[ptx]['cycle_number']['description'] = ("Number of 91-day periods "
            "that have elapsed since ICESat-2 entered the science orbit. Each of the 1,387 "
            "reference ground track (RGTs) is targeted in the polar regions once "
            "every 91 days.")
        # delta time
        IS2_atl11_corr[ptx]['delta_time'] = delta_time['AT'].copy()
        IS2_atl11_fill[ptx]['delta_time'] = delta_time['AT'].fill_value
        IS2_atl11_dims[ptx]['delta_time'] = ['ref_pt','cycle_number']
        IS2_atl11_corr_attrs[ptx]['delta_time'] = collections.OrderedDict()
        IS2_atl11_corr_attrs[ptx]['delta_time']['units'] = "seconds since 2018-01-01"
        IS2_atl11_corr_attrs[ptx]['delta_time']['long_name'] = "Elapsed GPS seconds"
        IS2_atl11_corr_attrs[ptx]['delta_time']['standard_name'] = "time"
        IS2_atl11_corr_attrs[ptx]['delta_time']['calendar'] = "standard"
        IS2_atl11_corr_attrs[ptx]['delta_time']['source'] = "ATL06"
        IS2_atl11_corr_attrs[ptx]['delta_time']['description'] = ("Number of GPS "
            "seconds since the ATLAS SDP epoch. The ATLAS Standard Data Products (SDP) epoch offset "
            "is defined within /ancillary_data/atlas_sdp_gps_epoch as the number of GPS seconds "
            "between the GPS epoch (1980-01-06T00:00:00.000000Z UTC) and the ATLAS SDP epoch. By "
            "adding the offset contained within atlas_sdp_gps_epoch to delta time parameters, the "
            "time in gps_seconds relative to the GPS epoch can be computed.")
        IS2_atl11_corr_attrs[ptx]['delta_time']['coordinates'] = \
            "ref_pt cycle_number latitude longitude"
        # latitude
        IS2_atl11_corr[ptx]['latitude'] = latitude['AT'].copy()
        IS2_atl11_fill[ptx]['latitude'] = latitude['AT'].fill_value
        IS2_atl11_dims[ptx]['latitude'] = ['ref_pt']
        IS2_atl11_corr_attrs[ptx]['latitude'] = collections.OrderedDict()
        IS2_atl11_corr_attrs[ptx]['latitude']['units'] = "degrees_north"
        IS2_atl11_corr_attrs[ptx]['latitude']['contentType'] = "physicalMeasurement"
        IS2_atl11_corr_attrs[ptx]['latitude']['long_name'] = "Latitude"
        IS2_atl11_corr_attrs[ptx]['latitude']['standard_name'] = "latitude"
        IS2_atl11_corr_attrs[ptx]['latitude']['source'] = "ATL06"
        IS2_atl11_corr_attrs[ptx]['latitude']['description'] = ("Center latitude of "
            "selected segments")
        IS2_atl11_corr_attrs[ptx]['latitude']['valid_min'] = -90.0
        IS2_atl11_corr_attrs[ptx]['latitude']['valid_max'] = 90.0
        IS2_atl11_corr_attrs[ptx]['latitude']['coordinates'] = \
            "ref_pt delta_time longitude"
        # longitude
        IS2_atl11_corr[ptx]['longitude'] = longitude['AT'].copy()
        IS2_atl11_fill[ptx]['longitude'] = longitude['AT'].fill_value
        IS2_atl11_dims[ptx]['longitude'] = ['ref_pt']
        IS2_atl11_corr_attrs[ptx]['longitude'] = collections.OrderedDict()
        IS2_atl11_corr_attrs[ptx]['longitude']['units'] = "degrees_east"
        IS2_atl11_corr_attrs[ptx]['longitude']['contentType'] = "physicalMeasurement"
        IS2_atl11_corr_attrs[ptx]['longitude']['long_name'] = "Longitude"
        IS2_atl11_corr_attrs[ptx]['longitude']['standard_name'] = "longitude"
        IS2_atl11_corr_attrs[ptx]['longitude']['source'] = "ATL06"
        IS2_atl11_corr_attrs[ptx]['longitude']['description'] = ("Center longitude of "
            "selected segments")
        IS2_atl11_corr_attrs[ptx]['longitude']['valid_min'] = -180.0
        IS2_atl11_corr_attrs[ptx]['longitude']['valid_max'] = 180.0
        IS2_atl11_corr_attrs[ptx]['longitude']['coordinates'] = \
            "ref_pt delta_time latitude"

        # cycle statistics variables
        IS2_atl11_corr_attrs[ptx]['cycle_stats']['Description'] = ("The cycle_stats subgroup "
            "contains summary information about segments for each reference point, including "
            "the uncorrected mean heights for reference surfaces, blowing snow and cloud "
            "indicators, and geolocation and height misfit statistics.")
        IS2_atl11_corr_attrs[ptx]['cycle_stats']['data_rate'] = ("Data within this group "
            "are stored at the average segment rate.")

        # for each along-track dataset
        for key,val in OUTPUT['AT'].items():
            # add to output
            IS2_atl11_corr[ptx]['cycle_stats'][key] = val.copy()
            IS2_atl11_fill[ptx]['cycle_stats'][key] = val.fill_value
            IS2_atl11_dims[ptx]['cycle_stats'][key] = ['ref_pt','cycle_number']
            IS2_atl11_corr_attrs[ptx]['cycle_stats'][key] = collections.OrderedDict()
            IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['units'] = "meters"
            IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['contentType'] = "referenceInformation"
            IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['long_name'] = LONGNAME[key]
            IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['description'] = DESCRIPTION[key]
            IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['source'] = MODEL
            IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['reference'] = model_version
            IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['coordinates'] = \
                "../ref_pt ../cycle_number ../delta_time ../latitude ../longitude"

        # if crossover measurements were calculated
        if CROSSOVERS:
            # crossing track variables
            IS2_atl11_corr_attrs[ptx][XT]['Description'] = ("The crossing_track_data "
                "subgroup contains elevation data at crossover locations. These are "
                "locations where two ICESat-2 pair tracks cross, so data are available "
                "from both the datum track, for which the granule was generated, and "
                "from the crossing track.")
            IS2_atl11_corr_attrs[ptx][XT]['data_rate'] = ("Data within this group are "
                "stored at the average segment rate.")

            # reference point
            IS2_atl11_corr[ptx][XT]['ref_pt'] = ref_pt['XT'].copy()
            IS2_atl11_fill[ptx][XT]['ref_pt'] = None
            IS2_atl11_dims[ptx][XT]['ref_pt'] = None
            IS2_atl11_corr_attrs[ptx][XT]['ref_pt'] = collections.OrderedDict()
            IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['units'] = "1"
            IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['contentType'] = "referenceInformation"
            IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['long_name'] = ("fit center reference point number, "
                "segment_id")
            IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['source'] = "derived, ATL11 algorithm"
            IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['description'] = ("The reference-point number of the "
                "fit center for the datum track. The reference point is the 7 digit segment_id number "
                "corresponding to the center of the ATL06 data used for each ATL11 point.  These are "
                "sequential, starting with 1 for the first segment after an ascending equatorial "
                "crossing node.")
            IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['coordinates'] = \
                "delta_time latitude longitude"

            # reference ground track of the crossing track
            IS2_atl11_corr[ptx][XT]['rgt'] = fileID[ptx][XT]['rgt'][:].copy()
            IS2_atl11_fill[ptx][XT]['rgt'] = fileID[ptx][XT]['rgt'].attrs['_FillValue']
            IS2_atl11_dims[ptx][XT]['rgt'] = None
            IS2_atl11_corr_attrs[ptx][XT]['rgt'] = collections.OrderedDict()
            IS2_atl11_corr_attrs[ptx][XT]['rgt']['units'] = "1"
            IS2_atl11_corr_attrs[ptx][XT]['rgt']['contentType'] = "referenceInformation"
            IS2_atl11_corr_attrs[ptx][XT]['rgt']['long_name'] = "crossover reference ground track"
            IS2_atl11_corr_attrs[ptx][XT]['rgt']['source'] = "ATL06"
            IS2_atl11_corr_attrs[ptx][XT]['rgt']['description'] = "The RGT number for the crossing data."
            IS2_atl11_corr_attrs[ptx][XT]['rgt']['coordinates'] = \
                "ref_pt delta_time latitude longitude"
            # cycle_number of the crossing track
            IS2_atl11_corr[ptx][XT]['cycle_number'] = fileID[ptx][XT]['cycle_number'][:].copy()
            IS2_atl11_fill[ptx][XT]['cycle_number'] = fileID[ptx][XT]['cycle_number'].attrs['_FillValue']
            IS2_atl11_dims[ptx][XT]['cycle_number'] = None
            IS2_atl11_corr_attrs[ptx][XT]['cycle_number'] = collections.OrderedDict()
            IS2_atl11_corr_attrs[ptx][XT]['cycle_number']['units'] = "1"
            IS2_atl11_corr_attrs[ptx][XT]['cycle_number']['long_name'] = "crossover cycle number"
            IS2_atl11_corr_attrs[ptx][XT]['cycle_number']['source'] = "ATL06"
            IS2_atl11_corr_attrs[ptx][XT]['cycle_number']['description'] = ("Cycle number for the "
                "crossing data. Number of 91-day periods that have elapsed since ICESat-2 entered "
                "the science orbit. Each of the 1,387 reference ground track (RGTs) is targeted "
                "in the polar regions once every 91 days.")
            # delta time of the crossing track
            IS2_atl11_corr[ptx][XT]['delta_time'] = delta_time['XT'].copy()
            IS2_atl11_fill[ptx][XT]['delta_time'] = delta_time['XT'].fill_value
            IS2_atl11_dims[ptx][XT]['delta_time'] = ['ref_pt']
            IS2_atl11_corr_attrs[ptx][XT]['delta_time'] = {}
            IS2_atl11_corr_attrs[ptx][XT]['delta_time']['units'] = "seconds since 2018-01-01"
            IS2_atl11_corr_attrs[ptx][XT]['delta_time']['long_name'] = "Elapsed GPS seconds"
            IS2_atl11_corr_attrs[ptx][XT]['delta_time']['standard_name'] = "time"
            IS2_atl11_corr_attrs[ptx][XT]['delta_time']['calendar'] = "standard"
            IS2_atl11_corr_attrs[ptx][XT]['delta_time']['source'] = "ATL06"
            IS2_atl11_corr_attrs[ptx][XT]['delta_time']['description'] = ("Number of GPS "
                "seconds since the ATLAS SDP epoch. The ATLAS Standard Data Products (SDP) epoch offset "
                "is defined within /ancillary_data/atlas_sdp_gps_epoch as the number of GPS seconds "
                "between the GPS epoch (1980-01-06T00:00:00.000000Z UTC) and the ATLAS SDP epoch. By "
                "adding the offset contained within atlas_sdp_gps_epoch to delta time parameters, the "
                "time in gps_seconds relative to the GPS epoch can be computed.")
            IS2_atl11_corr_attrs[ptx]['delta_time']['coordinates'] = \
                "ref_pt latitude longitude"
            # latitude of the crossover measurement
            IS2_atl11_corr[ptx][XT]['latitude'] = latitude['XT'].copy()
            IS2_atl11_fill[ptx][XT]['latitude'] = latitude['XT'].fill_value
            IS2_atl11_dims[ptx][XT]['latitude'] = ['ref_pt']
            IS2_atl11_corr_attrs[ptx][XT]['latitude'] = collections.OrderedDict()
            IS2_atl11_corr_attrs[ptx][XT]['latitude']['units'] = "degrees_north"
            IS2_atl11_corr_attrs[ptx][XT]['latitude']['contentType'] = "physicalMeasurement"
            IS2_atl11_corr_attrs[ptx][XT]['latitude']['long_name'] = "crossover latitude"
            IS2_atl11_corr_attrs[ptx][XT]['latitude']['standard_name'] = "latitude"
            IS2_atl11_corr_attrs[ptx][XT]['latitude']['source'] = "ATL06"
            IS2_atl11_corr_attrs[ptx][XT]['latitude']['description'] = ("Center latitude of "
                "selected segments")
            IS2_atl11_corr_attrs[ptx][XT]['latitude']['valid_min'] = -90.0
            IS2_atl11_corr_attrs[ptx][XT]['latitude']['valid_max'] = 90.0
            IS2_atl11_corr_attrs[ptx][XT]['latitude']['coordinates'] = \
                "ref_pt delta_time longitude"
            # longitude of the crossover measurement
            IS2_atl11_corr[ptx][XT]['longitude'] = longitude['XT'].copy()
            IS2_atl11_fill[ptx][XT]['longitude'] = longitude['XT'].fill_value
            IS2_atl11_dims[ptx][XT]['longitude'] = ['ref_pt']
            IS2_atl11_corr_attrs[ptx][XT]['longitude'] = collections.OrderedDict()
            IS2_atl11_corr_attrs[ptx][XT]['longitude']['units'] = "degrees_east"
            IS2_atl11_corr_attrs[ptx][XT]['longitude']['contentType'] = "physicalMeasurement"
            IS2_atl11_corr_attrs[ptx][XT]['longitude']['long_name'] = "crossover longitude"
            IS2_atl11_corr_attrs[ptx][XT]['longitude']['standard_name'] = "longitude"
            IS2_atl11_corr_attrs[ptx][XT]['longitude']['source'] = "ATL06"
            IS2_atl11_corr_attrs[ptx][XT]['longitude']['description'] = ("Center longitude of "
                "selected segments")
            IS2_atl11_corr_attrs[ptx][XT]['longitude']['valid_min'] = -180.0
            IS2_atl11_corr_attrs[ptx][XT]['longitude']['valid_max'] = 180.0
            IS2_atl11_corr_attrs[ptx][XT]['longitude']['coordinates'] = \
                "ref_pt delta_time latitude"

            # for each crossover dataset
            for key,val in OUTPUT['XT'].items():
                # add to output
                IS2_atl11_corr[ptx][XT][key] = val.copy()
                IS2_atl11_fill[ptx][XT][key] = val.fill_value
                IS2_atl11_dims[ptx][XT][key] = ['ref_pt']
                IS2_atl11_corr_attrs[ptx][XT][key] = collections.OrderedDict()
                IS2_atl11_corr_attrs[ptx][XT][key]['units'] = "meters"
                IS2_atl11_corr_attrs[ptx][XT][key]['contentType'] = "referenceInformation"
                IS2_atl11_corr_attrs[ptx][XT][key]['long_name'] = LONGNAME[key]
                IS2_atl11_corr_attrs[ptx][XT][key]['description'] = DESCRIPTION[key]
                IS2_atl11_corr_attrs[ptx][XT][key]['source'] = MODEL
                IS2_atl11_corr_attrs[ptx][XT][key]['reference'] = model_version
                IS2_atl11_corr_attrs[ptx][XT][key]['coordinates'] = \
                    "ref_pt delta_time latitude longitude"

    # output HDF5 files with interpolated surface mass balance data
    args = (PRD,model_version,TRK,GRAN,SCYC,ECYC,RL,VERS,AUX)
    file_format = '{0}_{1}_{2}{3}_{4}{5}_{6}_{7}{8}.h5'
    # print file information
    print('\t{0}'.format(file_format.format(*args))) if VERBOSE else None
    HDF5_ATL11_corr_write(IS2_atl11_corr, IS2_atl11_corr_attrs,
        CLOBBER=True, INPUT=os.path.basename(FILE), CROSSOVERS=CROSSOVERS,
        FILL_VALUE=IS2_atl11_fill, DIMENSIONS=IS2_atl11_dims,
        FILENAME=os.path.join(ddir,file_format.format(*args)))
    # change the permissions mode
    os.chmod(os.path.join(ddir,file_format.format(*args)), MODE)
Beispiel #38
0
version = info.get('version', '0.0.1')
major_version, minor_version, _ = version.split('.', 2)
major_version = int(major_version)
minor_version = int(minor_version)
name = 'trytond_stock_supply_day'

download_url = 'http://downloads.tryton.org/%s.%s/' % (major_version,
                                                       minor_version)
if minor_version % 2:
    version = '%s.%s.dev0' % (major_version, minor_version)
    download_url = ('hg+http://hg.tryton.org/modules/%s#egg=%s-%s' %
                    (name[8:], name, version))

requires = []
for dep in info.get('depends', []):
    if not re.match(r'(ir|res|webdav)(\W|$)', dep):
        requires.append(get_require_version('trytond_%s' % dep))
requires.append(get_require_version('trytond'))

setup(
    name=name,
    version=version,
    description='Tryton module to add supply weekdays',
    long_description=read('README'),
    author='Tryton',
    author_email='*****@*****.**',
    url='http://www.tryton.org/',
    download_url=download_url,
    keywords='tryton supply day',
    package_dir={'trytond.modules.stock_supply_day': '.'},
    packages=[
def normalize_tanchor(value):

    def normalize_single_tanchor(value, point='certain'):
        singlematch = re.compile("\(after ([^']+), before ([^']+)\)")
        if re.match(singlematch, value):
            singleout = singlematch.findall(value)
            if re.match(r'\d{4}-\d{1,2}-\d{1,2}$', singleout[0][0]):
                after = datetime.strptime(singleout[0][0], '%Y-%m-%d')
            else:
                ba, bb, ea, after = normalize_time(singleout[0][0])
            if re.match(r'\d{4}-\d{1,2}-\d{1,2}$', singleout[0][1]):
                before = datetime.strptime(singleout[0][1], '%Y-%m-%d')
            else:
                before, bb, ea, eb = normalize_time(singleout[0][1])
            return after, before
        elif 'after' in value:
            value = value.strip('after ')
            if re.match(r'\d{4}-\d{1,2}-\d{1,2}$', value):
                return datetime.strptime(value, '%Y-%m-%d'), None
            else:
                ba, bb, ea, eb = normalize_time(value)
                return eb, None
        elif 'before' in value:
            value = value.strip('before ')
            if re.match(r'\d{4}-\d{1,2}-\d{1,2}$', value):
                return None, datetime.strptime(value, '%Y-%m-%d')
            else:
                ba, bb, ea, eb = normalize_time(value)
                return None, ba

        elif re.match(r'\d{4}-\d{1,2}-\d{1,2}$', value):
            after = datetime.strptime(value, '%Y-%m-%d')
            return after, after
        else:
            ## temporal code
            ba, bb, ea, eb = normalize_time(value)
            if point == 'begin':
                return ba, ba
            elif point == 'end':
                return eb, eb
            else:
                return ba, bb, ea, eb

    def normalize_multi_tanchor(value):
        # print(value)
        if 'freq' in value:
            multimatch = re.compile("\(begin:(.+), end:(.+), freq:(.+)\)")
        elif 'dur' in value:
            multimatch = re.compile("\(begin:(.+), end:(.+), dur:(.+)=\)")
        else:
            multimatch = re.compile("\(begin:(.+), end:(.+)\)")
        if re.match(multimatch, value):
            mout = multimatch.search(value)
            ba, bb = normalize_single_tanchor(mout.group(1), 'begin')
            ea, eb = normalize_single_tanchor(mout.group(2), 'end')
            return ba, bb, ea, eb


    if 'AND' in value or 'OR' in value:
        return None
    else:
        if 'dis=' in value:
            return None
        if re.match(r"\(begin:(.+), end:(.+)\)", value):
            return normalize_multi_tanchor(value)
        else:
            return normalize_single_tanchor(value)
Beispiel #40
0
    def gagent_check_fstrim(self, test, params, env):
        """
        Execute "guest-fstrim" command to guest agent
        :param test: kvm test object
        :param params: Dictionary with the test parameters
        :param env: Dictionary with test environment.

        """
        def get_host_scsi_disk():
            """
            Get latest scsi disk which enulated by scsi_debug module
            Return the device name and the id in host
            """
            scsi_disk_info = process.system_output(
                avo_path.find_command('lsscsi'), shell=True).splitlines()
            scsi_debug = [_ for _ in scsi_disk_info if 'scsi_debug' in _][-1]
            scsi_debug = scsi_debug.split()
            host_id = scsi_debug[0][1:-1]
            device_name = scsi_debug[-1]
            return (host_id, device_name)

        def get_guest_discard_disk(session):
            """
            Get disk without partitions in guest.
            """
            list_disk_cmd = "ls /dev/[sh]d*|sed 's/[0-9]//p'|uniq -u"
            disk = session.cmd_output(list_disk_cmd).splitlines()[0]
            return disk

        def get_provisioning_mode(device, host_id):
            """
            Get disk provisioning mode, value usually is 'writesame_16',
            depends on params for scsi_debug module.
            """
            device_name = os.path.basename(device)
            path = "/sys/block/%s/device/scsi_disk" % device_name
            path += "/%s/provisioning_mode" % host_id
            return utils.read_one_line(path).strip()

        def get_allocation_bitmap():
            """
            get block allocation bitmap
            """
            path = "/sys/bus/pseudo/drivers/scsi_debug/map"
            try:
                return utils.read_one_line(path).strip()
            except IOError:
                logging.warn("could not get bitmap info, path '%s' is "
                             "not exist", path)
            return ""

        for vm in env.get_all_vms():
            if vm:
                vm.destroy()
                env.unregister_vm(vm.name)
        host_id, disk_name = get_host_scsi_disk()
        provisioning_mode = get_provisioning_mode(disk_name, host_id)
        logging.info("Current provisioning_mode = '%s'", provisioning_mode)
        bitmap = get_allocation_bitmap()
        if bitmap:
            logging.debug("block allocation bitmap: %s" % bitmap)
            raise error.TestError("block allocation bitmap"
                                  " not empty before test.")
        vm_name = params["main_vm"]
        test_image = "scsi_debug"
        params["start_vm"] = "yes"
        params["image_name_%s" % test_image] = disk_name
        params["image_format_%s" % test_image] = "raw"
        params["image_raw_device_%s" % test_image] = "yes"
        params["force_create_image_%s" % test_image] = "no"
        params["drive_format_%s" % test_image] = "scsi-block"
        params["drv_extra_params_%s" % test_image] = "discard=on"
        params["images"] = " ".join([params["images"], test_image])

        error_context.context("boot guest with disk '%s'" % disk_name, logging.info)
        env_process.preprocess_vm(test, params, env, vm_name)

        self.setup(test, params, env)
        timeout = float(params.get("login_timeout", 240))
        session = self.vm.wait_for_login(timeout=timeout)
        device_name = get_guest_discard_disk(session)

        error_context.context("format disk '%s' in guest" % device_name, logging.info)
        format_disk_cmd = params["format_disk_cmd"]
        format_disk_cmd = format_disk_cmd.replace("DISK", device_name)
        session.cmd(format_disk_cmd)

        error_context.context("mount disk with discard options '%s'" % device_name,
                              logging.info)
        mount_disk_cmd = params["mount_disk_cmd"]
        mount_disk_cmd = mount_disk_cmd.replace("DISK", device_name)
        session.cmd(mount_disk_cmd)

        error_context.context("write the disk with dd command", logging.info)
        write_disk_cmd = params["write_disk_cmd"]
        session.cmd(write_disk_cmd)

        error_context.context("Delete the file created before on disk", logging.info)
        delete_file_cmd = params["delete_file_cmd"]
        session.cmd(delete_file_cmd)

        # check the bitmap before trim
        bitmap_before_trim = get_allocation_bitmap()
        if not re.match(r"\d+-\d+", bitmap_before_trim):
            raise error.TestFail("didn't get the bitmap of the target disk")
        error_context.context("the bitmap_before_trim is %s" % bitmap_before_trim,
                              logging.info)
        total_block_before_trim = abs(sum([eval(i) for i in
                                           bitmap_before_trim.split(',')]))
        error_context.context("the total_block_before_trim is %d"
                              % total_block_before_trim, logging.info)

        error_context.context("execute the guest-fstrim cmd", logging.info)
        self.gagent.fstrim()

        # check the bitmap after trim
        bitmap_after_trim = get_allocation_bitmap()
        if not re.match(r"\d+-\d+", bitmap_after_trim):
            raise error.TestFail("didn't get the bitmap of the target disk")
        error_context.context("the bitmap_after_trim is %s" % bitmap_after_trim,
                              logging.info)
        total_block_after_trim = abs(sum([eval(i) for i in
                                          bitmap_after_trim.split(',')]))
        error_context.context("the total_block_after_trim is %d"
                              % total_block_after_trim, logging.info)

        if total_block_after_trim > total_block_before_trim:
            raise error.TestFail("the bitmap_after_trim is lager, the command"
                                 " guest-fstrim may not work")
        if self.vm:
            self.vm.destroy()
Beispiel #41
0
 def validate_path(cls, path):
     return re.match(r'^/[^><|&()?]*$', path)
Beispiel #42
0
    def extractChapterUrlsAndMetadata(self):

        # fetch the chapter.  From that we will get almost all the
        # metadata and chapter list

        url = self.url
        logger.debug("URL: " + url)

        # use BeautifulSoup HTML parser to make everything easier to find.
        try:
            data = self._fetchUrl(url)
            # non-existent/removed story urls get thrown to the front page.
            if "<h4>Featured Story</h4>" in data:
                raise exceptions.StoryDoesNotExist(self.url)
            soup = self.make_soup(data)
        except HTTPError as e:
            if e.code == 404:
                raise exceptions.StoryDoesNotExist(self.url)
            else:
                raise e

        # if blocked, attempt login.
        if soup.find("div", {"class": "blocked"}) or soup.find(
                "li", {"class": "blocked"}):
            if self.performLogin(url):  # performLogin raises
                # FailedToLogin if it fails.
                soup = self.make_soup(self._fetchUrl(url, usecache=False))

        divstory = soup.find('div', id='story')
        storya = divstory.find('a', href=re.compile("^/story/\d+$"))
        if storya:  # if there's a story link in the divstory header, this is a chapter page.
            # normalize story URL on chapter list.
            self.story.setMetadata('storyId', storya['href'].split('/', )[2])
            url = "https://" + self.getSiteDomain() + storya['href']
            logger.debug("Normalizing to URL: " + url)
            self._setURL(url)
            try:
                soup = self.make_soup(self._fetchUrl(url))
            except HTTPError as e:
                if e.code == 404:
                    raise exceptions.StoryDoesNotExist(self.url)
                else:
                    raise e

        # if blocked, attempt login.
        if soup.find("div", {"class": "blocked"}) or soup.find(
                "li", {"class": "blocked"}):
            if self.performLogin(url):  # performLogin raises
                # FailedToLogin if it fails.
                soup = self.make_soup(self._fetchUrl(url, usecache=False))

        # title - first h4 tag will be title.
        titleh4 = soup.find('div', {'class': 'storylist'}).find('h4')
        self.story.setMetadata('title', stripHTML(titleh4.a))

        if 'Deleted story' in self.story.getMetadata('title'):
            raise exceptions.StoryDoesNotExist("This story was deleted. %s" %
                                               self.url)

        # Find authorid and URL from... author url.
        a = soup.find('span', {
            'class': 'author'
        }).find('a', href=re.compile(r"^/a/"))
        self.story.setMetadata('authorId', a['href'].split('/')[2])
        self.story.setMetadata('authorUrl', 'https://' + self.host + a['href'])
        self.story.setMetadata('author', a.string)

        # description
        storydiv = soup.find("div", {"id": "story"})
        self.setDescription(
            url,
            storydiv.find("blockquote", {
                'class': 'summary'
            }).p)
        #self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string)

        # most of the meta data is here:
        metap = storydiv.find("p", {"class": "meta"})
        self.story.addToList(
            'category',
            metap.find("a", href=re.compile(r"^/category/\d+")).string)

        # warnings
        # <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span>
        spanreq = metap.find("span", {"class": "story-warnings"})
        if spanreq:  # can be no warnings.
            for a in spanreq.findAll("a"):
                self.story.addToList('warnings', a['title'])

        ## perhaps not the most efficient way to parse this, using
        ## regexps for each rather than something more complex, but
        ## IMO, it's more readable and amenable to change.
        metastr = stripHTML(unicode(metap)).replace('\n', ' ').replace(
            '\t', ' ').replace(u'\u00a0', ' ')

        m = re.match(r".*?Rating: (.+?) -.*?", metastr)
        if m:
            self.story.setMetadata('rating', m.group(1))

        m = re.match(r".*?Genres: (.+?) -.*?", metastr)
        if m:
            for g in m.group(1).split(','):
                self.story.addToList('genre', g)

        m = re.match(r".*?Characters: (.*?) -.*?", metastr)
        if m:
            for g in m.group(1).split(','):
                if g:
                    self.story.addToList('characters', g)

        m = re.match(r".*?Published: ([0-9-]+?) -.*?", metastr)
        if m:
            self.story.setMetadata('datePublished',
                                   makeDate(m.group(1), "%Y-%m-%d"))

        # Updated can have more than one space after it. <shrug>
        m = re.match(r".*?Updated: ([0-9-]+?) +-.*?", metastr)
        if m:
            self.story.setMetadata('dateUpdated',
                                   makeDate(m.group(1), "%Y-%m-%d"))

        m = re.match(r".*? - ([0-9,]+?) words.*?", metastr)
        if m:
            self.story.setMetadata('numWords', m.group(1))

        if metastr.endswith("Complete"):
            self.story.setMetadata('status', 'Completed')
        else:
            self.story.setMetadata('status', 'In-Progress')

        # get the chapter list first this time because that's how we
        # detect the need to login.
        storylistul = soup.find('ul', {'class': 'storylist'})
        if not storylistul:
            # no list found, so it's a one-chapter story.
            self.add_chapter(self.story.getMetadata('title'), url)
        else:
            chapterlistlis = storylistul.findAll('li')
            for chapterli in chapterlistlis:
                if "blocked" in chapterli['class']:
                    # paranoia check.  We should already be logged in by now.
                    raise exceptions.FailedToLogin(url, self.username)
                else:
                    #print "chapterli.h4.a (%s)"%chapterli.h4.a
                    self.add_chapter(
                        chapterli.h4.a.string, u'https://%s%s' %
                        (self.getSiteDomain(), chapterli.h4.a['href']))
        return
Beispiel #43
0
import re
import sys
import metricbeat
import unittest


@unittest.skipUnless(re.match("(?i)win|linux|darwin|freebsd", sys.platform),
                     "os")
class Test(metricbeat.BaseTest):
    def test_drop_fields(self):

        self.render_config_template(
            modules=[{
                "name": "system",
                "metricsets": ["cpu"],
                "period": "1s"
            }],
            processors=[{
                "drop_fields": {
                    "when": "range.system.cpu.system.pct.lt: 0.1",
                    "fields": ["system.cpu.load"],
                },
            }])
        proc = self.start_beat()
        self.wait_until(lambda: self.output_lines() > 0)
        proc.check_kill_and_wait()

        output = self.read_output_json()
        self.assertEqual(len(output), 1)
        evt = output[0]
        self.assert_fields_are_documented(evt)
Beispiel #44
0
 def validate_path(cls, path):
     return path == "/" or re.match(r'^[^/><|&()#?]+$', path)
  exit()


# begin reading file
header_counter = 0
message = ""

input_file = open(sys.argv[1], 'r')
output_file = open('results.xml', 'w+')
output_file.write('<?xml version="1.0" encoding="UTF-8" ?>\n')
output_file.write('<testsuites>\n\t<testsuite>\n')
for line in input_file:
  line = line.rstrip()

  # check for header
  matchObj = re.match(r'\d+: (.+)!', line, re.M)

  if matchObj:

    # print message for previous issue
    if header_counter > 0:
      output_file.write('\t\t\t<failure message="%s">\n\t\t\t</failure>\n\t\t</testcase>\n' % message)

    # print header for current issue
    output_file.write('\t\t<testcase name="%s">\n' % matchObj.group(1))
    message = ""
    header_counter += 1
  else:
    message = message + "\n" + line

# print message for last detected issue
Beispiel #46
0
def parse_line(line):
    match = re.match(FOOD_PATT, line)
    ingredients = [s.strip() for s in match.group(1).split()]
    alergens = [s.strip() for s in match.group(2).split(',')]
    return Food(ingredients, alergens)
import re

regex = u'откр(о|ы)(((т[^к])|(т$))|(в[^а]ш?)|й|л|е|ю|я)'

fileName = input(
    'Введите имя файла, в котором необходимо найти все формы глагола \'открыть\': '
)
f = open(fileName, 'r', encoding='utf-8')

list_open = []  #список найденных форм глагола открыть

for r in f:
    r = r.lower()
    list_word = r.split(' ')
    for w in list_word:
        w = w.strip('.,:;-()?!%*\'\"\n\r\t\f\v')
        m = re.match(regex, w)  #ищет по заданному шаблону в начале строки
        if m != None:
            if (
                    w in list_open
            ) == 0:  #проверка наличия элемента в списки (1 - значение найдено, 0 - в списке такого значения нет)
                list_open.append(w)

f.close()
print('Формы глагола \'открыть\':')
c = 0  #количество найденных слов
for i in list_open:
    print(i)
    c = c + 1
print('Итого: ' + str(c))
async def get_the_final(result: str) -> str:
#def get_the_final(result: str) -> str:
    #获得五个回答
    global msg
    msg = ''
    r_res = re.compile('(\d)\s+(\d)\s+(\d)\s+(\d)\s+(\d+)\W+(\d+)')
    m_res = re.match(r_res,result)
    a1,a2,a3,a4,minm,maxm = m_res.groups()
    
    url = 'https://www.yuque.com/api/v2/repos/385391/docs/2185141'
    url_head = {'User-Agent':'Mozilla/5.0','content-type':'application/json','X-Auth-Token':'{token}'}
    r = req.get(url,headers = url_head)
    r_content = r.text.replace('\\"','"')
    #创建分数统计列表
    goal=['']
    #创建+-统计
    list1=['']
    list2=['']
    #计数
    num = 1
    pd_table = pd.read_html(r_content, encoding='utf-8', header=0)[0]
    PCindex = list(pd_table.index)

    for e_tr in PCindex:
        #初始化三个表
        goal.append('')
        list1.append('')
        list2.append('')
            
        if  pd_table.loc[e_tr,'内存升级'] != 0:
            if a1 != 1:
                list1[num] = list1[num]+'+'
            if a3 == 1:
                list1[num] = list1[num]+'+'
        if pd_table.loc[e_tr,'内存/G'] == 16:
            if a1 == 1:
                list2[num] = list2[num]+'-'
            if a1 == 3 or a1 == 4:
                list1[num] = list1[num]+'+'
            if a3 != 1:
                list1[num] = list1[num]+'+'
        if pd_table.loc[e_tr,'屏幕色域(数字为%)'] == 0:
            if a3 == 1:
                list2[num] += '-'
        if pd_table.loc[e_tr,'用途'] == 3:
            if a1 == 1:
                list2[num] += '-'
            if a2 == 1:
                list2[num] += '-'
            if a2 == 3:
                list2[num] += '+'
        if pd_table.loc[e_tr,'屏幕描述'] == 1 or pd_table.loc[e_tr,'屏幕描述'] == 2:
            if a1 == 3:
                list1[num] += '+'
            if a3 != 1:
                list1[num] += '+'
            
    
        if len(list1[num]) > 0:
            if len(list1[num]) > len(list2[num]):
                if int(minm) < int(pd_table.loc[e_tr,'参考售价/元']) and int(pd_table.loc[e_tr,'参考售价/元']) < int(maxm):
                    msg += '\n'+'本款推荐:%s' % pd_table.loc[e_tr,'产品型号']
                    msg += '\n'+'处理器配置:%s' % pd_table.loc[e_tr,'处理器']
                    msg += '\n'+'色域:%s' % pd_table.loc[e_tr,'屏幕色域(数字为%)']
                    msg += '\n'+'参考售价:%d' % pd_table.loc[e_tr,'参考售价/元'] +'\n'
                    goal[num] = list1[num]
            if len(list1[num]) == 1 and len(list2[num]) > 1:
                msg += '\n'+'本款推荐:%s' % pd_table.loc[e_tr,'产品型号']
                msg += '\n'+'处理器配置:%s' % pd_table.loc[e_tr,'处理器']
                msg += '\n'+'色域:%s' % pd_table.loc[e_tr,'屏幕色域(数字为%)']
                msg += '\n'+'参考售价:%d' % pd_table.loc[e_tr,'参考售价/元']+'\n'
        else:
            goal[num]=''
        num += 1
    try:
        return f'{msg}'
    except:
        return f'没有找到匹配的型号,尝试改变需求试试吧'
    
    
#print(get_the_final("1 1 1 1 4000-5000"))
Beispiel #49
0
    def _get_storage_config(self, host):
        pvs = self.dbapi.ipv_get_by_ihost(host.id)

        instance_backing = constants.LVG_NOVA_BACKING_IMAGE
        concurrent_disk_operations = constants.LVG_NOVA_PARAM_DISK_OPS_DEFAULT

        final_pvs = []
        adding_pvs = []
        removing_pvs = []
        nova_lvg_uuid = None
        for pv in pvs:
            if (pv.lvm_vg_name == constants.LVG_NOVA_LOCAL and
                    pv.pv_state != constants.PV_ERR):
                pv_path = pv.disk_or_part_device_path
                if (pv.pv_type == constants.PV_TYPE_PARTITION and
                        '-part' not in pv.disk_or_part_device_path and
                        '-part' not in pv.lvm_vg_name):
                    # add the disk partition to the disk path
                    partition_number = re.match('.*?([0-9]+)$',
                                                pv.lvm_pv_name).group(1)
                    pv_path += "-part%s" % partition_number

                if (pv.pv_state == constants.PV_ADD):
                    adding_pvs.append(pv_path)
                    final_pvs.append(pv_path)
                elif(pv.pv_state == constants.PV_DEL):
                    removing_pvs.append(pv_path)
                else:
                    final_pvs.append(pv_path)
                nova_lvg_uuid = pv.ilvg_uuid

        if nova_lvg_uuid:
            lvg = self.dbapi.ilvg_get(nova_lvg_uuid)

            instance_backing = lvg.capabilities.get(
                constants.LVG_NOVA_PARAM_BACKING)
            concurrent_disk_operations = lvg.capabilities.get(
                constants.LVG_NOVA_PARAM_DISK_OPS)

        global_filter, update_filter = self._get_lvm_global_filter(host)

        values = {
            'openstack::nova::storage::final_pvs': final_pvs,
            'openstack::nova::storage::adding_pvs': adding_pvs,
            'openstack::nova::storage::removing_pvs': removing_pvs,
            'openstack::nova::storage::lvm_global_filter': global_filter,
            'openstack::nova::storage::lvm_update_filter': update_filter,
            'openstack::nova::storage::instance_backing': instance_backing,
            'openstack::nova::storage::concurrent_disk_operations':
                concurrent_disk_operations, }

        # If NOVA is a service on a ceph-external backend, use the ephemeral_pool
        # and ceph_conf file that are stored in that DB entry.
        # If NOVA is not on any ceph-external backend, it must be on the internal
        # ceph backend with default "ephemeral" pool and default "/etc/ceph/ceph.conf"
        # config file
        sb_list = self.dbapi.storage_backend_get_list_by_type(
            backend_type=constants.SB_TYPE_CEPH_EXTERNAL)
        if sb_list:
            for sb in sb_list:
                if constants.SB_SVC_NOVA in sb.services:
                    ceph_ext_obj = self.dbapi.storage_ceph_external_get(sb.id)
                    images_rbd_pool = sb.capabilities.get('ephemeral_pool')
                    images_rbd_ceph_conf = \
                        constants.CEPH_CONF_PATH + os.path.basename(ceph_ext_obj.ceph_conf)

                    values.update({'openstack::nova::storage::images_rbd_pool':
                                   images_rbd_pool,
                                   'openstack::nova::storage::images_rbd_ceph_conf':
                                   images_rbd_ceph_conf, })
        return values
Beispiel #50
0
def DelugeUri(v):
    try:
        return re.match(r'(([^:]+):([^@]+)@([^:$]+)(:([0-9]+))?)', v).group(0)
    except:
        raise argparse.ArgumentTypeError("String '{}' does not match required format".format(v))
 def __call__(self, cfg, gpu_no):
     print("calling program with gpu "+str(gpu_no))
     cmd = ['python3', self.program, '--cfg', str(cfg), str(gpu_no)]
     outs = ""
     #outputval = 0
     outputval = ""
     try:
         outs = str(check_output(cmd,stderr=STDOUT, timeout=40000))
         if os.path.isfile(logfile): 
             with open(logfile,'a') as f_handle:
                 f_handle.write(outs)
         else:
             with open(logfile,'w') as f_handle:
                 f_handle.write(outs)
         outs = outs.split("\\n")
         
         #TODO_CHRIS hacky solution
         #outputval = 0
         #for i in range(len(outs)-1,1,-1):
         for i in range(len(outs)-1,-1,-1):
             #if re.match("^\d+?\.\d+?$", outs[-i]) is None:
             #CHRIS changed outs[-i] to outs[i]
             print(outs[i])
             if re.match("^\(\-?\d+\.?\d*\e?\+?\-?\d*\,\s\-?\d+\.?\d*\e?\+?\-?\d*\)$", outs[i]) is None:
                 #do nothing
                 a=1
             else:
                 #outputval = -1 * float(outs[-i])
                 outputval = outs[i]
         
         #if np.isnan(outputval):
         #    outputval = 0
     except subprocess.CalledProcessError as e:
         traceback.print_exc()
         print (e.output)
     except:
         print ("Unexpected error:")
         traceback.print_exc()
         print (outs)
         
         #outputval = 0
     #TODO_CHRIS hacky solution
     tuple_str1 = ''
     tuple_str2 = ''
     success = True
     i = 1
     try:
         while outputval[i] != ',':
             tuple_str1 += outputval[i]
             i += 1
         i += 1
         while outputval[i] != ')':
             tuple_str2 += outputval[i]
             i += 1
     except:
         print("error in receiving answer from gpu " + str(gpu_no))
         success = False
     try:
         tuple = (float(tuple_str1),float(tuple_str2),success)
     except:
         tuple = (0.0,0.0,False)
     #return outputval
     return tuple
Beispiel #52
0
            return True
        else:
            print 'FAILED'
            print_result(expect_file, result_data, True)
            return False
    except Exception as e:
        print '\nFAILED:', e.message
    return False


if __name__ == '__main__':
    passed = []
    failed = []
    disabled = []

    for mvm, test_dir, test in load_tests():
        if any(re.match(regex, test) for regex in disabled_tests):
            disabled.append(test)
            continue

        if run_test(mvm, test_dir, test):
            passed.append(test)
        else:
            failed.append(test)

    print '\n\n Summary:', len(passed), 'PASSED,', len(failed), 'FAILED,', len(disabled), 'DISABLED'
    print '\nPASSED:', passed
    print '\nFAILED:', failed
    print '\nDISABLED:', disabled
    print '\n\n'
Beispiel #53
0
def is_bucket_specified_in_domain_name(path, headers):
    host = headers.get('host', '')
    return re.match(r'.*s3(\-website)?\.([^\.]+\.)?amazonaws.com', host)
Beispiel #54
0
    def forward_request(self, method, path, data, headers):

        # parse path and query params
        parsed_path = urlparse.urlparse(path)

        # Make sure we use 'localhost' as forward host, to ensure moto uses path style addressing.
        # Note that all S3 clients using LocalStack need to enable path style addressing.
        if 's3.amazonaws.com' not in headers.get('host', ''):
            headers['host'] = 'localhost'

        # check content md5 hash integrity if not a copy request
        if 'Content-MD5' in headers and not self.is_s3_copy_request(headers, path):
            response = check_content_md5(data, headers)
            if response is not None:
                return response

        modified_data = None

        # check bucket name
        bucket_name = get_bucket_name(path, headers)
        if method == 'PUT' and not re.match(BUCKET_NAME_REGEX, bucket_name):
            if len(parsed_path.path) <= 1:
                return error_response('Unable to extract valid bucket name. Please ensure that your AWS SDK is ' +
                    'configured to use path style addressing, or send a valid <Bucket>.s3.amazonaws.com "Host" header',
                    'InvalidBucketName', status_code=400)
            return error_response('The specified bucket is not valid.', 'InvalidBucketName', status_code=400)

        # TODO: For some reason, moto doesn't allow us to put a location constraint on us-east-1
        to_find = to_bytes('<LocationConstraint>us-east-1</LocationConstraint>')
        if data and data.startswith(to_bytes('<')) and to_find in data:
            modified_data = data.replace(to_find, to_bytes(''))

        # If this request contains streaming v4 authentication signatures, strip them from the message
        # Related isse: https://github.com/localstack/localstack/issues/98
        # TODO we should evaluate whether to replace moto s3 with scality/S3:
        # https://github.com/scality/S3/issues/237
        if headers.get('x-amz-content-sha256') == 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD':
            modified_data = strip_chunk_signatures(modified_data or data)
            headers['content-length'] = headers.get('x-amz-decoded-content-length')

        # POST requests to S3 may include a "${filename}" placeholder in the
        # key, which should be replaced with an actual file name before storing.
        if method == 'POST':
            original_data = modified_data or data
            expanded_data = multipart_content.expand_multipart_filename(original_data, headers)
            if expanded_data is not original_data:
                modified_data = expanded_data

        # If no content-type is provided, 'binary/octet-stream' should be used
        # src: https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html
        if method == 'PUT' and not headers.get('content-type'):
            headers['content-type'] = 'binary/octet-stream'

        # persist this API call to disk
        persistence.record('s3', method, path, data, headers)

        # parse query params
        query = parsed_path.query
        path = parsed_path.path
        bucket = path.split('/')[1]
        query_map = urlparse.parse_qs(query, keep_blank_values=True)

        # remap metadata query params (not supported in moto) to request headers
        append_metadata_headers(method, query_map, headers)

        if query == 'notification' or 'notification' in query_map:
            # handle and return response for ?notification request
            response = handle_notification_request(bucket, method, data)
            return response

        if query == 'cors' or 'cors' in query_map:
            if method == 'GET':
                return get_cors(bucket)
            if method == 'PUT':
                return set_cors(bucket, data)
            if method == 'DELETE':
                return delete_cors(bucket)

        if query == 'lifecycle' or 'lifecycle' in query_map:
            if method == 'GET':
                return get_lifecycle(bucket)
            if method == 'PUT':
                return set_lifecycle(bucket, data)

        if query == 'replication' or 'replication' in query_map:
            if method == 'GET':
                return get_replication(bucket)
            if method == 'PUT':
                return set_replication(bucket, data)

        if query == 'encryption' or 'encryption' in query_map:
            if method == 'GET':
                return get_encryption(bucket)
            if method == 'PUT':
                return set_encryption(bucket, data)

        if query == 'object-lock' or 'object-lock' in query_map:
            if method == 'GET':
                return get_object_lock(bucket)
            if method == 'PUT':
                return set_object_lock(bucket, data)

        if modified_data is not None:
            return Request(data=modified_data, headers=headers, method=method)
        return True
Beispiel #55
0
def pm_button_callback(_, __, query):
    if re.match('engine_pm', query.data):
        return True
    def __init__(self, inputList, exactMatches={}, patternMatches={}):
        defaultArgExactMatches = {

            '-o' : (1, ArgumentListFilter.outputFileCallback),
            '-c' : (0, ArgumentListFilter.compileOnlyCallback),
            '-E' : (0, ArgumentListFilter.preprocessOnlyCallback),
            '-S' : (0, ArgumentListFilter.assembleOnlyCallback),

            '--verbose' : (0, ArgumentListFilter.verboseFlagCallback),
            '--param' : (1, ArgumentListFilter.defaultBinaryCallback),
            '-aux-info' : (1, ArgumentListFilter.defaultBinaryCallback),

            #iam: presumably the len(inputFiles) == 0 in this case
            '--version' : (0, ArgumentListFilter.compileOnlyCallback),
            '-v' : (0, ArgumentListFilter.compileOnlyCallback),

            #warnings (apart from the regex below)
            '-w' : (0, ArgumentListFilter.compileOnlyCallback),
            '-W' : (0, ArgumentListFilter.compileOnlyCallback),


            #iam: if this happens, then we need to stop and think.
            '-emit-llvm' : (0, ArgumentListFilter.emitLLVMCallback),

            #iam: buildworld and buildkernel use these flags
            '-pipe' : (0, ArgumentListFilter.compileUnaryCallback),
            '-undef' : (0, ArgumentListFilter.compileUnaryCallback),
            '-nostdinc' : (0, ArgumentListFilter.compileUnaryCallback),
            '-nostdinc++' : (0, ArgumentListFilter.compileUnaryCallback),
            '-Qunused-arguments' : (0, ArgumentListFilter.compileUnaryCallback),
            '-no-integrated-as' : (0, ArgumentListFilter.compileUnaryCallback),
            '-integrated-as' : (0, ArgumentListFilter.compileUnaryCallback),
            #iam: gcc uses this in both compile and link, but clang only in compile
            '-pthread' : (0, ArgumentListFilter.compileUnaryCallback),
            # I think this is a compiler search path flag.  It is
            # clang only, so I don't think it counts as a separate CPP
            # flag.  Android uses this flag with its clang builds.
            '-nostdlibinc': (0, ArgumentListFilter.compileUnaryCallback),

            #iam: arm stuff
            '-mno-omit-leaf-frame-pointer' : (0, ArgumentListFilter.compileUnaryCallback),
            '-maes' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mno-aes' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mavx' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mno-avx' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mcmodel=kernel' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mno-red-zone' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mmmx' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mno-mmx' : (0, ArgumentListFilter.compileUnaryCallback),
            '-msse' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mno-sse2' : (0, ArgumentListFilter.compileUnaryCallback),
            '-msse2' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mno-sse3' : (0, ArgumentListFilter.compileUnaryCallback),
            '-msse3' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mno-sse' : (0, ArgumentListFilter.compileUnaryCallback),
            '-msoft-float' : (0, ArgumentListFilter.compileUnaryCallback),
            '-m3dnow' : (0, ArgumentListFilter.compileUnaryCallback),
            '-mno-3dnow' : (0, ArgumentListFilter.compileUnaryCallback),
            '-m32': (0, ArgumentListFilter.compileUnaryCallback),
            '-m64': (0, ArgumentListFilter.compileUnaryCallback),
            '-mstackrealign': (0, ArgumentListFilter.compileUnaryCallback),

            # Preprocessor assertion
            '-A' : (1, ArgumentListFilter.compileBinaryCallback),
            '-D' : (1, ArgumentListFilter.compileBinaryCallback),
            '-U' : (1, ArgumentListFilter.compileBinaryCallback),

            # Dependency generation
            '-M'  : (0, ArgumentListFilter.dependencyOnlyCallback),
            '-MM' : (0, ArgumentListFilter.dependencyOnlyCallback),
            '-MF' : (1, ArgumentListFilter.dependencyBinaryCallback),
            '-MG' : (0, ArgumentListFilter.dependencyOnlyCallback),
            '-MP' : (0, ArgumentListFilter.dependencyOnlyCallback),
            '-MT' : (1, ArgumentListFilter.dependencyBinaryCallback),
            '-MQ' : (1, ArgumentListFilter.dependencyBinaryCallback),
            '-MD' : (0, ArgumentListFilter.dependencyOnlyCallback),
            '-MMD' : (0, ArgumentListFilter.dependencyOnlyCallback),

            # Include
            '-I' : (1, ArgumentListFilter.compileBinaryCallback),
            '-idirafter' : (1, ArgumentListFilter.compileBinaryCallback),
            '-include' : (1, ArgumentListFilter.compileBinaryCallback),
            '-imacros' : (1, ArgumentListFilter.compileBinaryCallback),
            '-iprefix' : (1, ArgumentListFilter.compileBinaryCallback),
            '-iwithprefix' : (1, ArgumentListFilter.compileBinaryCallback),
            '-iwithprefixbefore' : (1, ArgumentListFilter.compileBinaryCallback),
            '-isystem' : (1, ArgumentListFilter.compileBinaryCallback),
            '-isysroot' : (1, ArgumentListFilter.compileBinaryCallback),
            '-iquote' : (1, ArgumentListFilter.compileBinaryCallback),
            '-imultilib' : (1, ArgumentListFilter.compileBinaryCallback),

            # Language
            '-ansi' : (0, ArgumentListFilter.compileUnaryCallback),
            '-pedantic' : (0, ArgumentListFilter.compileUnaryCallback),
            '-x' : (1, ArgumentListFilter.compileBinaryCallback),

            # Debug
            '-g' : (0, ArgumentListFilter.compileUnaryCallback),
            '-g0' : (0, ArgumentListFilter.compileUnaryCallback),     #iam: clang not gcc
            '-ggdb' : (0, ArgumentListFilter.compileUnaryCallback),
            '-ggdb3' : (0, ArgumentListFilter.compileUnaryCallback),
            '-gdwarf-2' : (0, ArgumentListFilter.compileUnaryCallback),
            '-gdwarf-3' : (0, ArgumentListFilter.compileUnaryCallback),
            '-gline-tables-only' : (0, ArgumentListFilter.compileUnaryCallback),

            '-p' : (0, ArgumentListFilter.compileUnaryCallback),
            '-pg' : (0, ArgumentListFilter.compileUnaryCallback),

            # Optimization
            '-O' : (0, ArgumentListFilter.compileUnaryCallback),
            '-O0' : (0, ArgumentListFilter.compileUnaryCallback),
            '-O1' : (0, ArgumentListFilter.compileUnaryCallback),
            '-O2' : (0, ArgumentListFilter.compileUnaryCallback),
            '-O3' : (0, ArgumentListFilter.compileUnaryCallback),
            '-Os' : (0, ArgumentListFilter.compileUnaryCallback),
            '-Ofast' : (0, ArgumentListFilter.compileUnaryCallback),
            '-Og' : (0, ArgumentListFilter.compileUnaryCallback),
            # Component-specifiers
            '-Xclang' : (1, ArgumentListFilter.compileBinaryCallback),
            '-Xpreprocessor' : (1, ArgumentListFilter.defaultBinaryCallback),
            '-Xassembler' : (1, ArgumentListFilter.defaultBinaryCallback),
            '-Xlinker' : (1, ArgumentListFilter.defaultBinaryCallback),
            # Linker
            '-l' : (1, ArgumentListFilter.linkBinaryCallback),
            '-L' : (1, ArgumentListFilter.linkBinaryCallback),
            '-T' : (1, ArgumentListFilter.linkBinaryCallback),
            '-u' : (1, ArgumentListFilter.linkBinaryCallback),
            #iam: specify the entry point
            '-e' : (1, ArgumentListFilter.linkBinaryCallback),
            # runtime library search path
            '-rpath' : (1, ArgumentListFilter.linkBinaryCallback),
            # iam: showed up in buildkernel
            '-shared' : (0, ArgumentListFilter.linkUnaryCallback),
            '-static' : (0, ArgumentListFilter.linkUnaryCallback),
            '-pie' : (0, ArgumentListFilter.linkUnaryCallback),
            '-nostdlib' : (0, ArgumentListFilter.linkUnaryCallback),
            '-nodefaultlibs' : (0, ArgumentListFilter.linkUnaryCallback),
            '-rdynamic' : (0, ArgumentListFilter.linkUnaryCallback),
            # darwin flags
            '-dynamiclib' : (0, ArgumentListFilter.linkUnaryCallback),
            '-current_version' : (1, ArgumentListFilter.linkBinaryCallback),
            '-compatibility_version' : (1, ArgumentListFilter.linkBinaryCallback),

            # dragonegg mystery argument
            '--64' : (0, ArgumentListFilter.compileUnaryCallback),

            # binutils nonsense
            '-print-multi-directory' : (0, ArgumentListFilter.compileUnaryCallback),
            '-print-multi-lib' : (0, ArgumentListFilter.compileUnaryCallback),
            '-print-libgcc-file-name' : (0, ArgumentListFilter.compileUnaryCallback),

            # Code coverage instrumentation
            '-fprofile-arcs' : (0, ArgumentListFilter.compileLinkUnaryCallback),
            '-coverage' : (0, ArgumentListFilter.compileLinkUnaryCallback),
            '--coverage' : (0, ArgumentListFilter.compileLinkUnaryCallback),

            #
            # BD: need to warn the darwin user that these flags will rain on their parade
            # (the Darwin ld is a bit single minded)
            #
            # 1) compilation with -fvisibility=hidden causes trouble when we try to
            #    attach bitcode filenames to an object file. The global symbols in object
            #    files get turned into local symbols when we invoke 'ld -r'
            #
            # 2) all stripping commands (e.g., -dead_strip) remove the __LLVM segment after
            #    linking
            #
            # Update: found a fix for problem 1: add flag -keep_private_externs when
            # calling ld -r.
            #
            '-Wl,-dead_strip' :  (0, ArgumentListFilter.darwinWarningLinkUnaryCallback),

        }

        #
        # Patterns for other command-line arguments:
        # - inputFiles
        # - objectFiles (suffix .o)
        # - libraries + linker options as in -lxxx -Lpath or -Wl,xxxx
        # - preprocessor options as in -DXXX -Ipath
        # - compiler warning options: -W....
        # - optimiziation and other flags: -f...
        #
        defaultArgPatterns = {
            r'^.+\.(c|cc|cpp|C|cxx|i|s|S|bc)$' : (0, ArgumentListFilter.inputFileCallback),
            # FORTRAN file types
            r'^.+\.([fF](|[0-9][0-9]|or|OR|pp|PP))$' : (0, ArgumentListFilter.inputFileCallback),
            #iam: the object file recogition is not really very robust, object files
            # should be determined by their existance and contents...
            r'^.+\.(o|lo|So|so|po|a|dylib)$' : (0, ArgumentListFilter.objectFileCallback),
            #iam: library.so.4.5.6 probably need a similar pattern for .dylib too.
            r'^.+\.dylib(\.\d)+$' : (0, ArgumentListFilter.objectFileCallback),
            r'^.+\.(So|so)(\.\d)+$' : (0, ArgumentListFilter.objectFileCallback),
            r'^-(l|L).+$' : (0, ArgumentListFilter.linkUnaryCallback),
            r'^-I.+$' : (0, ArgumentListFilter.compileUnaryCallback),
            r'^-D.+$' : (0, ArgumentListFilter.compileUnaryCallback),
            r'^-U.+$' : (0, ArgumentListFilter.compileUnaryCallback),
            r'^-Wl,.+$' : (0, ArgumentListFilter.linkUnaryCallback),
            r'^-W(?!l,).*$' : (0, ArgumentListFilter.compileUnaryCallback),
            r'^-f.+$' : (0, ArgumentListFilter.compileUnaryCallback),
            r'^-rtlib=.+$' : (0, ArgumentListFilter.linkUnaryCallback),
            r'^-std=.+$' : (0, ArgumentListFilter.compileUnaryCallback),
            r'^-stdlib=.+$' : (0, ArgumentListFilter.compileLinkUnaryCallback),
            r'^-mtune=.+$' : (0, ArgumentListFilter.compileUnaryCallback),
            r'^--sysroot=.+$' :  (0, ArgumentListFilter.compileUnaryCallback),
            r'^-print-prog-name=.*$' : (0, ArgumentListFilter.compileUnaryCallback),
            r'^-print-file-name=.*$' : (0, ArgumentListFilter.compileUnaryCallback),

        }

        #iam: try and keep track of the files, input object, and output
        self.inputList = inputList
        self.inputFiles = []
        self.objectFiles = []
        self.outputFilename = None

        #iam: try and split the args into linker and compiler switches
        self.compileArgs = []
        self.linkArgs = []


        self.isVerbose = False
        self.isDependencyOnly = False
        self.isPreprocessOnly = False
        self.isAssembleOnly = False
        self.isAssembly = False
        self.isCompileOnly = False
        self.isEmitLLVM = False


        argExactMatches = dict(defaultArgExactMatches)
        argExactMatches.update(exactMatches)
        argPatterns = dict(defaultArgPatterns)
        argPatterns.update(patternMatches)

        self._inputArgs = collections.deque(inputList)

        #iam: parse the cmd line, bailing if we discover that there will be no second phase.
        while (self._inputArgs   and
               not (self.isAssembly or
                    self.isAssembleOnly or
                    self.isPreprocessOnly)):
            # Get the next argument
            currentItem = self._inputArgs.popleft()
            _logger.debug('Trying to match item ' + currentItem)
            # First, see if this exact flag has a handler in the table.
            # This is a cheap test.  Otherwise, see if the input matches
            # some pattern with a handler that we recognize
            if currentItem in argExactMatches:
                (arity, handler) = argExactMatches[currentItem]
                flagArgs = self._shiftArgs(arity)
                handler(self, currentItem, *flagArgs)
            else:
                matched = False
                for pattern, (arity, handler) in argPatterns.items():
                    if re.match(pattern, currentItem):
                        flagArgs = self._shiftArgs(arity)
                        handler(self, currentItem, *flagArgs)
                        matched = True
                        break
                # If no action has been specified, this is a zero-argument
                # flag that we should just keep.
                if not matched:
                    _logger.warning('Did not recognize the compiler flag "%s"', currentItem)
                    self.compileUnaryCallback(currentItem)

        if DUMPING:
            self.dump()
Beispiel #57
0
lines_out = []

lines_out.append('# Лабораторная работа №' + sys.argv[2])

code_flag = False

for i in lines:
    s = i
    if(i.strip() == ''):
        continue
    elif(s.strip()[0] == '#'):
        if(code_flag):
            lines_out.append('```')
            code_flag = False
        if(re.match(r'^#[0-9]+$', s.strip()) == None):
            lines_out.append(s.strip()[1:])
        else:
            lines_out.append('## ' + s.strip()[1:])
    elif(re.match(r'^.*#[0-9]+$', s) == None):
        if(code_flag == False):
            lines_out.append('```shell')
            code_flag = True
        lines_out.append(i)
    else:
        s = s.split('#')
        if(code_flag):
            lines_out.append('```')
            code_flag = False
        lines_out.append('## ' + s[-1].strip())
        lines_out.append('```shell')
Beispiel #58
0
def main(meshfile,file,iexpt=10,iversn=22,yrflag=3,bio_path=None) :
    
    #
    # Trim input netcdf file name being appropriate for reading
    #
    meshfile=str(meshfile)[2:-2]
    logger.info("Reading mesh information from %s."%(meshfile))
    #
    # Read mesh file containing grid and coordinate information.
    # Note that for now, we are using T-grid in vertical which may need
    # to be improved by utilizing W-point along the vertical axis.
    #
    hdept,gdept,mbathy,mbathy_u,mbathy_v,mask,e3t,plon,plat=read_grid(meshfile)
    logger.warning("Reading grid information from regional.grid.[ab] (not completed)")
    #
    # Convert from P-point (i.e. NEMO grid) to U and V HYCOM grids
    #
    mask_u=p2u_2d(mask)
    mask_v=p2v_2d(mask)
    #
    # Read regional.grid.[ab]
    # Grid angle is not used for this product because all quantities are
    # on regular rectangular grid points.
    #
    angle=numpy.zeros(plon.shape)
    #
    # Number vertical layers in T-point.
    #
    nlev=gdept.size
    #
    # layer thickness in the absence of layer partial steps.
    #
    dt = gdept[1:] - gdept[:-1]
    #
    # Prepare/read input data file (in netcdf format). Reference time is 1950-01-01
    #
    logger.info("Reading data files.")
    file=str(file).strip()[2:-2]
    dirname=os.path.dirname(file)
    logger.debug("file name is {}".format(file))
    logger.debug("dirname is {}".format(dirname))
    logger.debug("basename is {}".format(os.path.basename(file)))
    m=re.match("(MERCATOR-PHY-24-)(.*\.nc)",os.path.basename(file))
    logger.debug("file prefix is {}".format(file_pre))
###    m=re.match(file_pre,os.path.basename(file))
    if not m:
        msg="File %s is not a grid2D file, aborting"%file
        logger.error(msg)
        raise ValueError(msg)
    
    #fileinput0=os.path.join(dirname+"/"+"MERCATOR-PHY-24-"+m.group(2))
    file_date=file[-16:-6]
    fileinput0=file
    print((file_date,file))
    next_day=datetime.datetime.strptime(file_date, '%Y-%m-%d')+datetime.timedelta(days=1)
    fileinput1=datetime.datetime.strftime(next_day,'%Y%m%d')
    fileinput1=os.path.join(dirname+"/"+file_pre+fileinput1+'.nc')
    
    logger.info("Reading from %s"%(fileinput0))
    ncid0=netCDF4.Dataset(fileinput0,"r")
    if timeavg_method==1 and os.path.isfile(fileinput1) :
        
        logger.info("timeavg_method=1, Reading from %s"%(fileinput1))
        ncid1=netCDF4.Dataset(fileinput1,"r")
        #
        # Calculate temporal averaged temperature, salinity, and velocity
        #
        uo =   0.5*(ncid0.variables["uo"][0,:,:,:]+    ncid1.variables["uo"][0,:,:,:])
        vo =   0.5*(ncid0.variables["vo"][0,:,:,:]+    ncid1.variables["vo"][0,:,:,:])
        salt = 0.5*(ncid0.variables["so"][0,:,:,:]+    ncid1.variables["so"][0,:,:,:])
        temp = 0.5*(ncid0.variables["thetao"][0,:,:,:]+ncid1.variables["thetao"][0,:,:,:])
        ssh = numpy.squeeze(0.5*(ncid0.variables["zos"][0,:,:]+ncid1.variables["zos"][0,:,:]))
    
    else:
        #
	# Set variables based on current file when timeavg_method ~=1 or the next netcdf file is not available
        logger.debug("time average method set to {}".format(timeavg_method))
        uo =   ncid0.variables["uo"][0,:,:,:]
        vo =   ncid0.variables["vo"][0,:,:,:]
        salt = ncid0.variables["so"][0,:,:,:]
        temp = ncid0.variables["thetao"][0,:,:,:]
        ssh = numpy.squeeze(ncid0.variables["zos"][0,:,:])
    #
    # I will account these values afterward. Because in the current version, I am accounting for missing values using a gap-filling methodology.
    #	
    logger.debug("getting _FillValue")
    uofill=ncid0.variables["uo"]._FillValue
    vofill=ncid0.variables["vo"]._FillValue
    slfill=ncid0.variables["so"]._FillValue
    tlfill=ncid0.variables["thetao"]._FillValue
    shfill=ncid0.variables["zos"]._FillValue

    # Set time
    logger.info("Set time.")
    time=ncid0.variables["time"][0]
    unit=ncid0.variables["time"].units
    tmp=cfunits.Units(unit)
    refy,refm,refd=(1950,1,1)
    tmp2=cfunits.Units("hours since %d-%d-%d 00:00:00"%(refy,refm,refd))
    tmp3=int(numpy.round(cfunits.Units.conform(time,tmp,tmp2)))
    mydt = datetime.datetime(refy,refm,refd,0,0,0) + datetime.timedelta(hours=tmp3) # Then calculate dt. Phew!

    if timeavg_method==1 and os.path.isfile(fileinput1)  :
        fnametemplate="archv.%Y_%j_%H"
        deltat=datetime.datetime(refy,refm,refd,0,0,0) + \
              datetime.timedelta(hours=tmp3) + \
              datetime.timedelta(hours=12)
        oname=deltat.strftime(fnametemplate)
    else:
        #
        # I am assuming that daily mean can be set at 00 instead of 12
        # for cases that there is no information of next day.
        #
        fnametemplate="archv.%Y_%j"
        deltat=datetime.datetime(refy,refm,refd,0,0,0) + \
              datetime.timedelta(hours=tmp3)
        oname=deltat.strftime(fnametemplate) + '_00'

    # model day
    refy, refm, refd=(1900,12,31)
    model_day= deltat-datetime.datetime(refy,refm,refd,0,0,0)
    model_day=model_day.days
    logger.info("Model day in HYCOM is %s"%str(model_day))
    if bio_path:
       jdm,idm=numpy.shape(plon)
       points = numpy.transpose(((plat.flatten(),plon.flatten())))
       delta = mydt.strftime( '%Y-%m-%d')
       # filename format MERCATOR-BIO-14-2013-01-05-00
       print((bio_path,delta))
       idx,biofname=search_biofile(bio_path,delta)
       if idx >7: 
          msg="No available BIO file within a week difference with PHY"
          logger.error(msg)
          raise ValueError(msg)
       logger.info("BIO file %s reading & interpolating to 1/12 deg grid cells ..."%biofname)
       ncidb=netCDF4.Dataset(biofname,"r")
       blon=ncidb.variables["longitude"][:];
       blat=ncidb.variables["latitude"][:]
       minblat=blat.min()
       no3=ncidb.variables["NO3"][0,:,:,:];
       no3[numpy.abs(no3)>1e+10]=numpy.nan
       po4=ncidb.variables["PO4"][0,:,:,:]
       si=ncidb.variables["Si"][0,:,:,:]
       po4[numpy.abs(po4)>1e+10]=numpy.nan
       si[numpy.abs(si)>1e+10]=numpy.nan
       # TODO: Ineed to improve this part
       nz,ny,nx=no3.shape
       dummy=numpy.zeros((nz,ny,nx+1))
       dummy[:,:,:nx]=no3;dummy[:,:,-1]=no3[:,:,-1]
       no3=dummy
       dummy=numpy.zeros((nz,ny,nx+1))
       dummy[:,:,:nx]=po4;dummy[:,:,-1]=po4[:,:,-1]
       po4=dummy
       dummy=numpy.zeros((nz,ny,nx+1))
       dummy[:,:,:nx]=si;dummy[:,:,-1]=si[:,:,-1]
       si=dummy
       dummy=numpy.zeros((nx+1))
       dummy[:nx]=blon
       blon=dummy
       blon[-1]=-blon[0]
# TODO:  Note that the coordinate files are for global configuration while
#        the data file saved for latitude larger than 30. In the case you change your data file coordinate
#        configuration you need to modify the following lines
       bio_coordfile=bio_path[:-4]+"/GLOBAL_ANALYSIS_FORECAST_BIO_001_014_COORD/GLO-MFC_001_014_mask.nc"
       biocrd=netCDF4.Dataset(bio_coordfile,"r")
       blat2 = biocrd.variables['latitude'][:]
       index=numpy.where(blat2>=minblat)[0]
       depth_lev = biocrd.variables['deptho_lev'][index[0]:,:]
#
#
#
       dummy=numpy.zeros((ny,nx+1))
       dummy[:,:nx]=depth_lev;dummy[:,-1]=depth_lev[:,-1]
       depth_lev=dummy
       depth_lev[depth_lev>50]=0
       depth_lev=depth_lev.astype('i')
       dummy_no3=no3
       dummy_po4=po4
       dummy_si=si
       for j in range(ny):
          for i in range(nx):
             dummy_no3[depth_lev[j,i]:nz-2,j,i]=no3[depth_lev[j,i]-1,j,i]
             dummy_po4[depth_lev[j,i]:nz-2,j,i]=po4[depth_lev[j,i]-1,j,i]
             dummy_si[depth_lev[j,i]:nz-2,j,i]=si[depth_lev[j,i]-1,j,i]
       no3=dummy_no3
       po4=dummy_po4
       si=dummy_si

#
       po4 = po4 * 106.0 * 12.01
       si = si   * 6.625 * 12.01
       no3 = no3 * 6.625 * 12.01


    logger.info("Read, trim, rotate NEMO velocities.")
    u=numpy.zeros((nlev,mbathy.shape[0],mbathy.shape[1]))
    v=numpy.zeros((nlev,mbathy.shape[0],mbathy.shape[1]))
    utmp=numpy.zeros((mbathy.shape))
    vtmp=numpy.zeros((mbathy.shape))
    #
    # Metrices to detect carrefully bottom at p-, u-, and v-grid points.While I have used 3D, mask data,following methods are good enough for now.
    #
    if mbathy_method  ==  1 :
        ip = mbathy   == -1
        iu = mbathy_u == -1
        iv = mbathy_v == -1
    else:
        ip = mask   == 0
        iu = mask_u == 0
        iv = mask_v == 0
    #
    # Read 3D velocity field to calculate barotropic velocity
    #
    # Estimate barotropic velocities using partial steps along the vertical axis. Note that for the early version of this code, 
    # I used dt = gdept[1:] - gdept[:-1] on NEMO t-grid. Furthermore, you may re-calculate this part on vertical grid cells for future. 
    #
    logger.info("Calculate barotropic velocities.")
    ubaro,vbaro=calc_uvbaro(uo,vo,e3t,iu,iv)
    #
    # Save 2D fields (here only ubaro & vbaro)
    #
    zeros=numpy.zeros(mbathy.shape)
    #flnm = open(oname+'.txt', 'w')
    #flnm.write(oname)
    #flnm.close()
    ssh = numpy.where(numpy.abs(ssh)>1000,0.,ssh*9.81) # NB: HYCOM srfhgt is in geopotential ...
    #
    outfile = abf.ABFileArchv("./data/"+oname,"w",iexpt=iexpt,iversn=iversn,yrflag=yrflag,)
    outfile.write_field(zeros,                   ip,"montg1"  ,0,model_day,1,0)
    outfile.write_field(ssh,                     ip,"srfhgt"  ,0,model_day,0,0)
    outfile.write_field(zeros,                   ip,"surflx"  ,0,model_day,0,0) # Not used
    outfile.write_field(zeros,                   ip,"salflx"  ,0,model_day,0,0) # Not used
    outfile.write_field(zeros,                   ip,"bl_dpth" ,0,model_day,0,0) # Not used
    outfile.write_field(zeros,                   ip,"mix_dpth",0,model_day,0,0) # Not used
    outfile.write_field(ubaro,                   iu,"u_btrop" ,0,model_day,0,0)
    outfile.write_field(vbaro,                   iv,"v_btrop" ,0,model_day,0,0)
    #
    if bio_path:
       logger.info("Calculate baroclinic velocities, temperature, and salinity data as well as BIO field.")
    else:
       logger.info("Calculate baroclinic velocities, temperature, and salinity data.")
    for k in numpy.arange(u.shape[0]) :
        if bio_path:
           no3k=interpolate2d(blat, blon, no3[k,:,:], points).reshape((jdm,idm))
           no3k = maplev(no3k)
           po4k=interpolate2d(blat, blon, po4[k,:,:], points).reshape((jdm,idm))
           po4k = maplev(po4k)
           si_k=interpolate2d(blat, blon, si[k,:,:], points).reshape((jdm,idm))
           si_k = maplev(si_k)
           if k%10==0 : logger.info("Writing 3D variables including BIO, level %d of %d"%(k+1,u.shape[0]))
        else:
           if k%10==0 : logger.info("Writing 3D variables, level %d of %d"%(k+1,u.shape[0]))
        #

        #
        uo[k,:,:]=numpy.where(numpy.abs(uo[k,:,:])<10,uo[k,:,:],0)
        vo[k,:,:]=numpy.where(numpy.abs(vo[k,:,:])<10,vo[k,:,:],0)

        # Baroclinic velocity (in HYCOM U- and V-grid)
        ul = p2u_2d(numpy.squeeze(uo[k,:,:])) - ubaro
        vl = p2v_2d(numpy.squeeze(vo[k,:,:])) - vbaro
        ul[iu]=spval
        vl[iv]=spval
        
        # Layer thickness
        
        dtl=numpy.zeros(mbathy.shape)
        # Use dt for the water column except the nearest cell to bottom 
        if thickness_method==1:
            if k < u.shape[0]-1 :
                J,I = numpy.where(mbathy>k)
                e3=(e3t[k,:,:])
                dtl[J,I]=dt[k]
                J,I = numpy.where(mbathy==k)
                dtl[J,I]=e3[J,I]
            else:
                e3=(e3t[k,:,:])
                J,I = numpy.where(mbathy==k)
                dtl[J,I]=e3[J,I]
	# Use partial cells for the whole water column.
        else :
            J,I = numpy.where(mbathy>=k)
            dtl[J,I]=e3t[k,J,I]

        # Salinity
        sl = salt[k,:,:]

        # Temperature
        tl = temp[k,:,:]
        # Need to be carefully treated in order to minimize artifacts to the resulting [ab] files.
        if fillgap_method==1:
            J,I= numpy.where(mbathy<k)
            sl = maplev(numpy.where(numpy.abs(sl)<1e2,sl,numpy.nan))
            sl[J,I]=spval
            J,I= numpy.where(mbathy<k)
            tl = maplev(numpy.where(numpy.abs(tl)<1e2,tl,numpy.nan))
            tl[J,I]=spval
        else:
            sl = numpy.where(numpy.abs(sl)<1e2,sl,numpy.nan)
            sl = numpy.minimum(numpy.maximum(maplev(sl),25),80.)
            tl = numpy.where(numpy.abs(tl)<=5e2,tl,numpy.nan)
            tl = numpy.minimum(numpy.maximum(maplev(tl),-5.),50.)

        # Thickness
        dtl = maplev(dtl)
        if k > 0 :
            with numpy.errstate(invalid='ignore'):
                K= numpy.where(dtl < 1e-4)
            sl[K] = sl_above[K]
            tl[K] = tl_above[K]
        #
        sl[ip]=spval
        tl[ip]=spval

        # Save 3D fields
        outfile.write_field(ul      ,iu,"u-vel.",0,model_day,k+1,0)
        outfile.write_field(vl      ,iv,"v-vel.",0,model_day,k+1,0)
        outfile.write_field(dtl*onem,ip,"thknss",0,model_day,k+1,0)
        outfile.write_field(tl      ,ip,"temp" , 0,model_day,k+1,0)
        outfile.write_field(sl      ,ip,"salin" ,0,model_day,k+1,0)
        if bio_path :
           outfile.write_field(no3k      ,ip,"ECO_no3" ,0,model_day,k+1,0)
           outfile.write_field(po4k      ,ip,"ECO_pho" ,0,model_day,k+1,0)
           outfile.write_field(si_k      ,ip,"ECO_sil" ,0,model_day,k+1,0)
                
        tl_above=numpy.copy(tl)
        sl_above=numpy.copy(sl)
    
    outfile.close()
    ncid0.close()
    if timeavg_method==1 and os.path.isfile(fileinput1)  :
        ncid1.close()
    if bio_path :
       ncidb.close()
Beispiel #59
0
def is_agent_path(path):
    path = os.path.basename(path)
    return not re.match(AGENT_NAME_PATTERN, path) is None
def lookup_by_isbn(number, forceUpdate=False):
    isbn, price = _process_isbn(number)
    print("Looking up isbn", isbn, "with price", price)

    # if length of isbn>0 and isn't "n/a" or "none"
    if len(isbn) > 0 and not re.match("^n(\s|/){0,1}a|none", isbn, re.I):
        # first we check our database
        titles = Title.select(Title.q.isbn == isbn)
        ##print titles #debug
        known_title = False
        the_titles = list(titles)
        if (len(the_titles) > 0) and (not forceUpdate):
            ##print "in titles"
            known_title = the_titles[0]
            ProductName = the_titles[0].booktitle.format()
            authors = []
            if len(the_titles[0].author) > 0:
                authors = [x.authorName.format() for x in the_titles[0].author]
            authors_as_string = ", ".join(authors)
            categories = []
            if len(the_titles[0].categorys) > 0:
                ##print len(the_titles[0].categorys)
                ##print the_titles[0].categorys
                categories = [x.categoryName.format() for x in the_titles[0].categorys]
            categories_as_string = ", ".join(categories)
            if price == 0:
                if len(the_titles[0].books) > 0:
                    ListPrice = max([x.listprice for x in the_titles[0].books])
                else:
                    ListPrice = 0
            else:
                ListPrice = price
            Manufacturer = the_titles[0].publisher.format()
            Format = the_titles[0].type.format()
            Kind = the_titles[0].kind.kindName
            orig_isbn = the_titles[0].origIsbn.format()
            #            if the_titles[0].images:
            #                 large_url = the_titles[0].images.largeUrl
            #                 med_url = the_titles[0].images.medUrl
            #                 small_url = the_titles[0].images.smallUrl
            #            else:
            #                 large_url = med_url = small_url = ''
            large_url = med_url = small_url = ""

            SpecialOrders = [
                tso.id
                for tso in Title.selectBy(
                    isbn=isbn
                ).throughTo.specialorder_pivots.filter(
                    TitleSpecialOrder.q.orderStatus == "ON ORDER"
                )
            ]
            return {
                "title": ProductName,
                "authors": authors,
                "authors_as_string": authors_as_string,
                "categories_as_string": categories_as_string,
                "list_price": ListPrice,
                "publisher": Manufacturer,
                "isbn": isbn,
                "orig_isbn": orig_isbn,
                "large_url": large_url,
                "med_url": med_url,
                "small_url": small_url,
                "format": Format,
                "kind": Kind,
                "known_title": known_title,
                "special_order_pivots": SpecialOrders,
            }
        else:  # we don't have it yet
            # if we're using amazon ecs
            if use_amazon_ecs:
                sleep(1)  # so amazon doesn't get huffy
                ecs.setLicenseKey(amazon_license_key)
                ecs.setSecretAccessKey(amazon_secret_key)
                ecs.setAssociateTag(amazon_associate_tag)

                ##print "about to search", isbn, isbn[0]
                amazonBooks = []

                idType = ""
                if len(isbn) == 12:
                    idType = "UPC"
                elif len(isbn) == 13:
                    # if we are using an internal isbn
                    if isbn.startswith(internal_isbn_prefix):
                        return []
                    # otherwise search on amazon.
                    elif isbn.startswith("978") or isbn.startswith("979"):
                        idType = "ISBN"
                    else:
                        idType = "EAN"
                try:
                    print("searching amazon for ", isbn, idType, file=sys.stderr)
                    amazonProds = AmzSear(isbn)
                    print(amazonProds, file=sys.stderr)
                except (ecs.InvalidParameterValue, HTTPError):
                    pass
                if amazonProds:
                    print(amazonProds, file=sys.stderr)
                    # inner comprehension tests each prodict for price whose type is in formats
                    # if we find a price which its key is in formats, then we return the coorresponding product
                    format_list = [
                        "Paperback",
                        "Mass Market Paperback",
                        "Hardcover",
                        "Perfect Paperback",
                        "Pamphlet",
                        "Plastic Comb",
                        "Spiral-bound",
                        "Print on Demand (Paperback)",
                        "DVD",
                        "Calendar",
                        "Board book",
                        "Audio Cassette",
                        "Cards",
                        "Audio CD",
                        "Diary",
                        "DVD-ROM",
                        "Library Binding",
                        "music",
                        "Vinyl",
                        "Health and Beauty",
                        "Hardback",
                    ]
                    prods = [
                        x
                        for x in amazonProds.values()
                        if [dum for dum in x["prices"].keys() if dum in format_list]
                    ]

                    for prod1 in prods:
                        print(prod1, file=sys.stderr)
                        price_dict = prod1["prices"]
                        listprice = max(price_dict.values())

                        format = [k for k in format_list if k in price_dict]
                        format = format[0]
                        if not format:
                            continue

                        title = prod1["title"]

                        image_url = prod1["image_url"]

                        authors = [
                            x.replace("by ", "")
                            for x in prod1["subtext"]
                            if x.startswith("by ")
                        ]
                        auth_list = [
                            y.strip()
                            for a in [x.split(", ") for x in authors[0].split(" and ")]
                            for y in a
                        ]
                        # we assume any full name less than five characters is an abbreviation like 'Jr.'
                        # so we add it back to the previous authorname
                        abbrev_list = [i for i, x in enumerate(auth_list) if len(x) < 5]
                        for i in abbrev_list:
                            auth_list[i - 1 : i + 1] = [
                                ", ".join(auth_list[i - 1 : i + 1])
                            ]

                        return {
                            "title": title,
                            "authors": auth_list,
                            "authors_as_string": ",".join(auth_list),
                            "categories_as_string": "",
                            "list_price": listprice,
                            "publisher": "",
                            "isbn": isbn,
                            "orig_isbn": isbn,
                            "large_url": image_url,
                            "med_url": image_url,
                            "small_url": image_url,
                            "format": format,
                            "kind": "books",
                            "known_title": known_title,
                            "special_orders": [],
                        }

                else:
                    traceback.print_exc()
                    print("using isbnlib from ecs", file=sys.stderr)
                    isbnlibbooks = []
                    try:
                        isbnlibbooks = isbnlib.meta(str(isbn))
                    except:
                        pass

                    if isbnlibbooks:
                        return {
                            "title": isbnlibbooks["Title"],
                            "authors": isbnlibbooks["Authors"],
                            "authors_as_string": ",".join(isbnlibbooks["Authors"]),
                            "categories_as_string": None,
                            "list_price": price,
                            "publisher": isbnlibbooks["Publisher"],
                            "isbn": isbn,
                            "orig_isbn": isbn,
                            "large_url": None,
                            "med_url": None,
                            "small_url": None,
                            "format": None,
                            "kind": "books",
                            "known_title": known_title,
                            "special_orders": [],
                        }
                    else:
                        return {}
            else:  # if we're scraping amazon
                print("scraping amazon", file=sys.stderr)
                headers = {
                    "User-Agent": random.sample(user_agents, 1).pop()
                }
                amazon_url_template = "http://www.amazon.com/dp/%s/"
                if len(isbn) == 13:
                    isbn10 = None
                    if isbnlib.is_isbn13(isbn):
                        isbn10 = isbnlib.to_isbn10(isbn)
                    else:
                        return {}
                if isbn10:
                    with requests.Session() as session:
                        try:
                            print("getting amazon")
                            page_response = session.get(
                                amazon_url_template % isbn10,
                                headers=headers,
                                timeout=0.1
                            )
                            print("got response")
                            page_content = BeautifulSoup(page_response.content, "lxml")
                            print("got parsed content")
                            try:
                                booktitle = page_content.select("#productTitle").pop().text
                            except Exception as e:
                                traceback.print_exc()
                                booktitle = ''
                            popover_preload = [
                                a.text
                                for a in page_content.select(
                                    ".author.notFaded .a-popover-preload a.a-link-normal"
                                )
                            ]
                            author_name = [
                                a.text
                                for a in page_content.select(
                                    ".author.notFaded a.a-link-normal"
                                )
                                if a.text not in popover_preload
                            ]
                            contributor_role = page_content.select(".contribution span")
                            try:
                                contributor_role = [
                                    re.findall("\w+", cr.text).pop()
                                    for cr in contributor_role
                                ]
                            except Exception as e:
                                traceback.print_exc()
                                contributor_role = []
                            author_role = zip(author_name, contributor_role)
                            try:
                                listprice = (
                                    page_content.select(".a-text-strike").pop().text
                                )
                            except IndexError as e:
                                print("using bookfinder4u")
                                if "listprice" not in locals():
                                    with requests.Session() as session:
                                        bookfinderurl = "http://www.bookfinder4u.com/IsbnSearch.aspx?isbn='%s'&mode=direct"
                                        url = bookfinderurl % isbn
                                        try:
                                            page_response2 = session.get(
                                                url,
                                                headers=headers,
                                                timeout=0.1
                                            )
                                            page_content2 = BeautifulSoup(
                                                page_response2.content, "lxml"
                                            )
                                        except Exception as e:
                                            traceback.print_exc()
                                            listprice = 0.0
                                        else:
                                            try:
                                                matches = re.search(
                                                    "List\sprice:\s(\w{2,4})\s(\d+(.\d+)?)",
                                                    page_content2.text,
                                                    re.I,
                                                )
                                                if matches:
                                                    listprice = matches.groups()[1]
                                                else:
                                                    listprice = 0.00
                                            except Exception as e:
                                                traceback.print_exc()
                                                listprice = 0.00
                            try:
                                book_edition = (
                                    page_content.select("#bookEdition").pop().text
                                )
                            except Exception as e:
                                traceback.print_exc()
                                book_edition = ""
                            try:
                                matches = re.findall(
                                    "(?<=imageGalleryData'\s:\s\[)\{.*?\}",
                                    page_content.contents[1].text,
                                )
                                image_url_dict = eval(matches[0])
                            except Exception as e:
                                traceback.print_exc()
                                image_url_dict = {"mainUrl": "", "thumbUrl": ""}
                            category_items = page_content.select(".zg_hrsr_ladder a")
                            category_items = [a.text for a in category_items]
                            product_details = page_content.select(
                                "#productDetailsTable"
                            )  # ul:first-of-type")
                            try:
                                product_details1 = product_details.pop().text.splitlines()
                                quit_flag = 0
                                for pd in product_details1:
                                    if pd.endswith("pages"):
                                        format, numpages = pd.split(":")
                                        numpages = numpages.replace(" pages", "").strip()
                                        quit_flag += 1
                                        continue
                                    if pd.startswith("Publisher: "):

                                        matches = re.match(
                                            "Publisher: ([^;^(]*)\s?([^(]*)?\W(.*)\W", pd
                                        ).groups()
                                        publisher = matches[0]
                                        publication_date = matches[2]
                                        quit_flag += 1
                                        continue
                                    if quit_flag == 2:
                                        break
                                else:
                                    publisher = ''
                                    format = ''
                            except Exception as e:
                                traceback.print_exc()
                                publisher = ''
                                format = ''
                            if booktitle:
                                return {
                                    "title": booktitle,
                                    "authors": author_name,
                                    "authors_as_string": ",".join(author_name),
                                    "categories_as_string": ",".join(category_items),
                                    "list_price": listprice,
                                    "publisher": publisher,
                                    "isbn": isbn,
                                    "orig_isbn": isbn,
                                    "large_url": image_url_dict["mainUrl"],
                                    "med_url": image_url_dict["mainUrl"],
                                    "small_url": image_url_dict["thumbUrl"],
                                    "format": format,
                                    "kind": "books",
                                    "known_title": known_title,
                                    "special_orders": [],
                                }
                        except Exception as e:
                            traceback.print_exc()
                            print("using isbnlib from scraper", file=sys.stderr)
                            isbnlibbooks = []
                            try:
                                isbnlibbooks = isbnlib.meta(str(isbn))
                            except:
                                pass

                            if isbnlibbooks:
                                return {
                                    "title": isbnlibbooks["Title"],
                                    "authors": isbnlibbooks["Authors"],
                                    "authors_as_string": ",".join(
                                        isbnlibbooks["Authors"]
                                    ),
                                    "categories_as_string": None,
                                    "list_price": price,
                                    "publisher": isbnlibbooks["Publisher"],
                                    "isbn": isbn,
                                    "orig_isbn": isbn,
                                    "large_url": None,
                                    "med_url": None,
                                    "small_url": None,
                                    "format": None,
                                    "kind": "books",
                                    "known_title": known_title,
                                    "special_orders": [],
                                }
                            else:
                                return {}
                else:
                    if title:
                        return {
                            "title": title,
                            "authors": author_name,
                            "authors_as_string": ",".join(author_name),
                            "categories_as_string": ",".join(category_items),
                            "list_price": listprice,
                            "publisher": publisher,
                            "isbn": isbn,
                            "orig_isbn": isbn,
                            "large_url": image_url_dict["mainUrl"],
                            "med_url": image_url_dict["mainUrl"],
                            "small_url": image_url_dict["thumbUrl"],
                            "format": format,
                            "kind": "books",
                            "known_title": known_title,
                            "special_orders": [],
                        }
                    else:
                        return {}
    else:
        return {}