def replace_links_with_text(html): """any absolute links will be replaced with the url in plain text, same with any img tags """ soup = BeautifulSoup(html, 'html5lib') abs_url_re = r'^http(s)?://' images = soup.find_all('img') for image in images: url = image.get('src', '') text = image.get('alt', '') if url == '' or re.match(abs_url_re, url): image.replaceWith(format_url_replacement(url, text)) links = soup.find_all('a') for link in links: url = link.get('href', '') text = ''.join(link.text) or '' if text == '': # this is due to an issue with url inlining in comments link.replaceWith('') elif url == '' or re.match(abs_url_re, url): link.replaceWith(format_url_replacement(url, text)) return force_text(soup.find('body').renderContents(), 'utf-8')
def process_line_exceptions(line, extra_tags): global except_base_tag if not ' ' in line or re.match('.*[а-яіїєґ]/.*', line): return line if re.match('^[^ ]+ [^ ]+ [^:]?[a-z].*$', line): return line if line.startswith('# !'): except_base_tag = re.findall('![a-z:-]+', line)[0][1:] + ':' return '' base = re.findall('^[^ ]+', line)[0] except_base_tag2 = except_base_tag if base.endswith('ся'): except_base_tag2 = except_base_tag.replace('verb:', 'verb:rev:') out_line = re.sub('([^ ]+) ?', '\\1 ' + base + ' ' + except_base_tag2 + 'unknown' + extra_tags + '\n', line) if except_base_tag in ('verb:imperf:', 'verb:perf:'): base_add = 'inf:' # if base.endswith('ся'): # base_add = 'rev:' + base_add out_line = re.sub("(verb:(?:rev:)?)((im)?perf:)", "\\1inf:\\2", out_line, 1) out_lines = out_line.split('\n') out_lines[0] = out_lines[0].replace(':unknown', '') out_line = '\n'.join(out_lines) return out_line[:-1]
def create_filetree(path=None, depth=0, max_depth=0): tree = None if max_depth == 0 or depth < max_depth: if path is None: path = os.getcwd() tree = dict(name=os.path.basename(path), children=[]) try: lst = os.listdir(path) except OSError: pass # ignore errors else: for name in lst: fn = os.path.join(path, name) if (os.path.isdir(fn) and re.match('^.*(Compiled)$', fn) is None): child = create_filetree(fn, depth + 1, max_depth) if child is not None: tree['children'].append(child) elif re.match('^.*\.(m|def|txt|csv)$', fn) is not None: tree['children'].append(dict(name=fn.replace( os.getcwd() + os.path.sep, ""))) return tree
def _sanitize(self, badKey, badVal): valid = True # Used for debugging if 'csv_line' not in self: self['csv_line'] = "-1" # Catch bad formatting if badKey in self: logging.debug(badKey, ''.join(self[badKey])) logging.debug("Bad Key") valid = False if 'last_pymnt_d' in self and re.match("^\s*$", self['last_pymnt_d']): if 'issue_d' in self: # If no payment received, last payment date = issue date self['last_pymnt_d'] = self['issue_d'] for k, v in self.items(): if badVal == v: logging.debug(badVal) valid = False break # Replace empties with 0s if re.match('^\s*$', str(v)): self[k] = 0 if not valid: logging.debug(self.items()) # Can't safely access specific keys, other than id, when incorrectly formatted logging.warning("Fix Loan {}".format(self['id'])) logging.warning("Line {}".format(self['csv_line'])) return valid
def process_line(line, extra_tags): line = re.sub(' *#.*$', '', line) # remove comments line = re.sub('-$', '', line) if not ' ' in line or re.match('.*[а-яіїєґ]/.*', line): out_line = line elif re.match('^[^ ]+ [^ ]+ [^:]?[a-z].*$', line): out_line = line elif re.match('^[^ ]+ [:^<a-z0-9_].*$', line): out_line = re.sub('^([^ ]+) ([^<a-z].*)$', '\\1 \\1 \\2', line) else: print('hit-', line, file=sys.stderr) base = re.findall('^[^ ]+', line)[0] out_line = re.sub('([^ ]+) ?', '\\1 ' + base + ' unknown' + extra_tags + '\n', line) return out_line[:-1] # if extra_tags != '' and not re.match('.* [a-z].*$', out_line): if extra_tags != '' and (not ' ' in out_line or ' ^' in out_line): extra_tags = ' ' + extra_tags if '|' in out_line: out_line = out_line.replace('|', extra_tags + '|') # if not "/" in out_line and not re.match("^[^ ]+ [^ ]+ [^ ]+$", out_line + extra_tags): # print("bad line:", out_line + extra_tags, file=sys.stderr) # if len(out_line)> 100: # print(out_line, file=sys.stderr) # sys.exit(1) return out_line + extra_tags
def __init__(self, host, debugfunc=None): if isinstance(host, types.TupleType): host, self.weight = host else: self.weight = 1 # parse the connection string m = re.match(r'^(?P<proto>unix):(?P<path>.*)$', host) if not m: m = re.match(r'^(?P<proto>inet):' r'(?P<host>[^:]+)(:(?P<port>[0-9]+))?$', host) if not m: m = re.match(r'^(?P<host>[^:]+):(?P<port>[0-9]+)$', host) if not m: raise ValueError('Unable to parse connection string: "%s"' % host) hostData = m.groupdict() if hostData.get('proto') == 'unix': self.family = socket.AF_UNIX self.address = hostData['path'] else: self.family = socket.AF_INET self.ip = hostData['host'] self.port = int(hostData.get('port', 11211)) self.address = ( self.ip, self.port ) if not debugfunc: debugfunc = lambda x: x self.debuglog = debugfunc self.deaduntil = 0 self.socket = None self.buffer = ''
def register(request) : ''' Handle a Post request with the following information: login, password, email ''' print 'receiving a request' #parameter retrieval try : login = request.GET['registerLogin'] password = request.GET['registerPassword'] email = request.GET['registerEmail'] except MultiValueDictKeyError : response=HttpResponse('400 - BAD URI') response.status_code=400 return response #parameter validation loginIsValid = re.match('[\w0-9]*', login) and len(login) > 3 and len(login) < 16 passwordIsValid = len(password) >= 6 #TODO check with number emailIsValid = re.match('[\w.]*@\w*\.[\w.]*', email) logger.info(login + ' ' + password + ' ' + email) if loginIsValid and passwordIsValid and emailIsValid : return processFormInformation(login, password, email, request) else : response=HttpResponse("400") response['message'] = 'invalid information' response.status_code=400 return response
def _apache_index(self, url): r = requests.get(url) if r.status_code != 200: raise ValueError(url+" status:"+str(r.status_code)) r.dirs = [] r.files = [] for l in r.content.split("\n"): # '<img src="/icons/folder.png" alt="[DIR]" /> <a href="7.0/">7.0/</a> 03-Dec-2014 19:57 - ' # ''<img src="/icons/tgz.png" alt="[ ]" /> <a href="owncloud_7.0.4-2.diff.gz">owncloud_7.0.4-2.diff.gz</a> 09-Dec-2014 16:53 9.7K <a href="owncloud_7.0.4-2.diff.gz.mirrorlist">Details</a>' # m = re.search("<a\s+href=[\"']?([^>]+?)[\"']?>([^<]+?)[\"']?</a>\s*([^<]*)", l, re.I) if m: # ('owncloud_7.0.4-2.diff.gz', 'owncloud_7.0.4-2.diff.gz', '09-Dec-2014 16:53 9.7K ') m1,m2,m3 = m.groups() if re.match("(/|\?|\w+://)", m1): # skip absolute urls, query strings and foreign urls continue if re.match("\.?\./?$", m1): # skip . and .. continue m3 = re.sub("[\s-]+$", "", m3) if re.search("/$", m1): r.dirs.append([m1, m3]) else: r.files.append([m1, m3]) return r
def processAux(self, dFrag): self.depth=self.depth+1 if not self.files.has_key(self.depth): self.files[self.depth]=[] thisDir=self.compoundDir(self.topDir, dFrag) os.chdir(thisDir) self.theDict[thisDir]={'xml': [], 'bin': [], 'dir': []} # print "Processing",thisDir," Depth",self.depth thisDirContents=os.listdir(thisDir) for fname in thisDirContents: if stat.S_ISDIR(os.stat(fname)[stat.ST_MODE]): if not re.match("^(CVS|images|search|photos|htdig|\.)", fname) and self.depth<4: self.processAux(self.compoundDir(dFrag,fname)) self.handleDir(thisDir, fname) os.chdir(thisDir) else: # print "File",fname if re.match(".*\.xml$", fname): self.handleXML(thisDir, dFrag, fname) elif re.match(".*\.(jpe?g|JPG|gif|png|html)$", fname): self.handleBinary(thisDir, fname) self.writeIndex(dFrag) self.depth=self.depth-1
def __init__(self, filename): self.name = "YNAB" self.transactions = [] with open(filename) as register: dr = csv.DictReader(register) for row in dr: trans = self._process_row(row) while True: # Merge split transactions into a single transaction regex = r'\(Split ([0-9]+)/([0-9]+)\)' match = re.match(regex, row["Memo"]) if not match: break for split_row in dr: match = re.match(regex, split_row["Memo"]) t = self._process_row(split_row) trans.amount += t.amount current_split = match.group(1) max_splits = match.group(2) if current_split == max_splits: break break trans.amount = round(trans.amount, 2) # This fixes errors from adding numbers that can't be represented in binary and expecting them to equal one that can that came from Mint. self.transactions.append(trans) self.transactions.sort()
def main(): f = open('4_dataset.txt', 'r') x = f.readlines() for line in x: if re.match('a={(.*)}', line): a = re.match('a={(.*)}', line).group(1).split(',') elif re.match('b={(.*)}', line): b = re.match('b={(.*)}', line).group(1).split(',') f00 = f01 = f10 = f11 = 0 print 'a =', [ int(i) for i in a ] print 'b =', [ int(i) for i in b ] for i in zip(a, b): if i == ('0', '0'): f00 += 1 if i == ('0', '1'): f01 += 1 if i == ('1', '0'): f10 += 1 if i == ('1', '1'): f11 += 1 print 'Similarity Coeff =', float(f00 + f11)/(f00 + f01 + f10 + f11) print 'Jaccard Coeff =', f11/float(f01 + f10 + f11)
def checkInCNAME(node_text, nodes): try: InCNAME = re.search("IN CNAME (.*)", node_text) alias = InCNAME.group(0).split("IN CNAME ")[1] #IP address found if re.match("(\d{1,3}\.)", alias): return alias # cname is a subdomain elif re.match(".*[a-x]\.", alias): return ("subdomain found (" + alias + ")") #cname is another cname else: try: alias_name = dns.name.Name([alias]) alias_IP = nodes[alias_name].to_text(alias_name) checkCname = checkInA(alias_IP) if checkCname is None: return checkInCNAME(alias_IP, nodes) else: return checkCname except: return (Fore.RED + "unknown host (" + alias + ")" + Fore.RESET) # node has no IN CNAME except: return None
def process_isolation_file(self, sql_file, output_file): """ Processes the given sql file and writes the output to output file """ try: command = "" for line in sql_file: tinctest.logger.info("re.match: %s" % re.match(r"^\d+[q\\<]:$", line)) print >> output_file, line.strip(), (command_part, dummy, comment) = line.partition("--") if command_part == "" or command_part == "\n": print >> output_file elif command_part.endswith(";\n") or re.match(r"^\d+[q\\<]:$", line): command += command_part tinctest.logger.info("Processing command: %s" % command) self.process_command(command, output_file) command = "" else: command += command_part for process in self.processes.values(): process.stop() except: for process in self.processes.values(): process.terminate() raise finally: for process in self.processes.values(): process.terminate()
def area_code_lookup(request, area_id, format): from mapit.models import Area, CodeType area_code = None if re.match('\d\d([A-Z]{2}|[A-Z]{4}|[A-Z]{2}\d\d\d|[A-Z]|[A-Z]\d\d)$', area_id): area_code = CodeType.objects.get(code='ons') elif re.match('[EW]0[12]\d{6}$', area_id): # LSOA/MSOA have ONS code type area_code = CodeType.objects.get(code='ons') elif re.match('[ENSW]\d{8}$', area_id): area_code = CodeType.objects.get(code='gss') if not area_code: return None args = { 'format': format, 'codes__type': area_code, 'codes__code': area_id } if re.match('[EW]01', area_id): args['type__code'] = 'OLF' elif re.match('[EW]02', area_id): args['type__code'] = 'OMF' area = get_object_or_404(Area, **args) path = '/area/%d%s' % (area.id, '.%s' % format if format else '') # If there was a query string, make sure it's passed on in the # redirect: if request.META['QUERY_STRING']: path += "?" + request.META['QUERY_STRING'] return HttpResponseRedirect(path)
def __load_book_menu (self, lines) : r1 = re.compile(u'^\s*目\s*录\s*$') r2 = re.compile(u'^\s*([^·…]+)\s*[·.…]{2,}\s*([l\d]+)\s*$') menus = {} start = False not_match = 0 for line in lines : words = line.decode(self.default_coding) words.strip('\n') if re.match(r1, words) : start = True continue elif start : m = re.match(r2, words) if m : title = m.group(1) page = m.group(2) page = page.replace('l', '1') page = int(page.encode(self.default_coding)) menus[page] = self.__get_simple_string(title) not_match = 0 else : not_match += 1 if not_match > 10 : break return menus
def filter_services(svcs): filtered = [] # filter includes if _args['--has']: for sv in svcs: for inc in _args['--has']: if inc in sv["tags"] and sv not in filtered: filtered.append(sv) if _args['--match']: for sv in svcs: for regex in _args['--match']: for tag in sv["tags"]: if re.match(regex, tag) and sv not in filtered: filtered.append(sv) if not filtered and not _args['--has'] and not _args['--match']: filtered = svcs if _args['--has-not']: for sv in list(filtered): # operate on a copy, otherwise .remove would change the list under our feet for exc in _args['--has-not']: if exc in sv["tags"]: filtered.remove(sv) if _args['--no-match']: for sv in list(filtered): for regex in _args['--no-match']: for tag in sv["tags"]: if re.match(regex, tag) and sv in list(filtered): filtered.remove(sv) return filtered
def _get_type_of_macro(self, macros, clss): for macro in macros: # ARGN Macros if re.match('ARG\d', macro): macros[macro]['type'] = 'ARGN' continue # USERN macros # are managed in the Config class, so no # need to look that here elif re.match('_HOST\w', macro): macros[macro]['type'] = 'CUSTOM' macros[macro]['class'] = 'HOST' continue elif re.match('_SERVICE\w', macro): macros[macro]['type'] = 'CUSTOM' macros[macro]['class'] = 'SERVICE' # value of macro: re.split('_HOST', '_HOSTMAC_ADDRESS')[1] continue elif re.match('_CONTACT\w', macro): macros[macro]['type'] = 'CUSTOM' macros[macro]['class'] = 'CONTACT' continue # On demand macro elif len(macro.split(':')) > 1: macros[macro]['type'] = 'ONDEMAND' continue # OK, classical macro... for cls in clss: if macro in cls.macros: macros[macro]['type'] = 'class' macros[macro]['class'] = cls continue
def _strip_and_unquote( keys, value ): if value[:3] == "'''": m = re.match( _MULTI_LINE_SINGLE, value ) if m: value = m.groups()[0] else: raise IllegalValueError( "string", keys, value ) elif value[:3] == '"""': m = re.match( _MULTI_LINE_DOUBLE, value ) if m: value = m.groups()[0] else: raise IllegalValueError( "string", keys, value ) elif value[0] == '"': m = re.match( _DQ_VALUE, value ) if m: value = m.groups()[0] else: raise IllegalValueError( "string", keys, value ) elif value[0] == "'": m = re.match( _SQ_VALUE, value ) if m: value = m.groups()[0] else: raise IllegalValueError( "string", keys, value ) else: # unquoted value = re.sub( '\s*#.*$', '', value ) # Note strip() removes leading and trailing whitespace, including # initial newlines on a multiline string: return value.strip()
def tourAllFiles(dirpath): global a global alen global domain global person # names = list of files in current path names = os.listdir(dirpath) # find 'si' and 'sx' prefix and 'phn' suffix # filter out 'sa' prefix pat1 = '.*si.*\.phn' pat2 = '.*sx.*\.phn' drpat = 'dr\d' for name in names: if re.match(pat1,name) != None or re.match(pat2,name) != None: phn2label(name) curpath = dirpath+'/'+name if os.path.isdir(curpath): # only use to drx/person/xxx.phn if re.match(drpat,name): domain = name else: person = name # iterate os.chdir(curpath) tourAllFiles(curpath) os.chdir(dirpath)
def parse(self, response): sel = Selector(response) result = [] ad = DatesItem() ad['name'] = "" for p in sel.xpath("//div[@class='poziomd']//text()").extract(): if re.match("^.*,", p): if p.startswith(","): ad['desc'] = p[2:] else: ad['desc'] = p[6:] ad['name'] = ad['name'].lstrip('1234567890() ').strip() if re.match('^.\s', ad['name']): ad['name'] = ad['name'][2:] ad['url'] = response.url if re.match(".*urodzeni.*", response.url): ad['isBirth'] = True else: ad['isBirth'] = False result.append(ad) ad = DatesItem() ad['name'] = "" elif re.match("^\s*[0-9]{1,4}", p) and not ad.has_key('date'): ad['date'] = re.match("^\s*[0-9]{1,4}", p).group() else: ad['name'] = ad['name'] + p return result
def main(): f = open("makefile2wrappers.txt","r"); lins = f.readlines(); f.close(); for l in lins: l = l.strip(); if len(l)==0: continue; print('Line: '+l); # $(C) -DDINT -c ../Source/umf_analyze.c -o umf_i_analyze.o defs=re.match(".*\)(.*)-c",l).group(1).strip(); # If there's no "-o" flag, just compile the file as is: if re.search('.*-o.*',l)!=None: src=re.match(".*-c(.*)-o",l).group(1).strip(); out=re.match(".*-o(.*)",l).group(1).strip(); f='SourceWrappers/'+out+".c"; print(' => Creating '+f+'\n'); o = open(f,"w"); DEFs = defs.strip().split("-D"); DEFs = [x for x in DEFs if x]; # Remove empty for d in DEFs: o.write('#define '+d+'\n'); o.write('#include <'+src+'>'+'\n'); o.close(); else: src=re.match(".*-c(.*)",l).group(1).strip(); f = "SourceWrappers/"+os.path.basename(src); print(' => Creating '+f+'\n'); o = open(f,"w"); o.write('#include <'+src+'>'+'\n'); o.close(); return 0
def test_various_ops(self): # This takes about n/3 seconds to run (about n/3 clumps of tasks, # times about 1 second per clump). NUMTASKS = 10 # no more than 3 of the 10 can run at once sema = threading.BoundedSemaphore(value=3) mutex = threading.RLock() numrunning = Counter() threads = [] for i in range(NUMTASKS): t = TestThread("<thread %d>" % i, self, sema, mutex, numrunning) threads.append(t) self.assertEqual(t.ident, None) self.assertTrue(re.match("<TestThread\(.*, initial\)>", repr(t))) t.start() if verbose: print("waiting for all tasks to complete") for t in threads: t.join(NUMTASKS) self.assertTrue(not t.is_alive()) self.assertNotEqual(t.ident, 0) self.assertFalse(t.ident is None) self.assertTrue(re.match("<TestThread\(.*, stopped -?\d+\)>", repr(t))) if verbose: print("all tasks done") self.assertEqual(numrunning.get(), 0)
def parse_requirements(requirements_file='requirements.txt'): requirements = [] with open(requirements_file, 'r') as f: for line in f: # For the requirements list, we need to inject only the portion # after egg= so that distutils knows the package it's looking for # such as: # -e git://github.com/openstack/nova/master#egg=nova if re.match(r'\s*-e\s+', line): requirements.append(re.sub(r'\s*-e\s+.*#egg=(.*)$', r'\1', line)) # such as: # http://github.com/openstack/nova/zipball/master#egg=nova elif re.match(r'\s*https?:', line): requirements.append(re.sub(r'\s*https?:.*#egg=(.*)$', r'\1', line)) # -f lines are for index locations, and don't get used here elif re.match(r'\s*-f\s+', line): pass # -r lines are for including other files, and don't get used here elif re.match(r'\s*-r\s+', line): pass # argparse is part of the standard library starting with 2.7 # adding it to the requirements list screws distro installs elif line == 'argparse' and sys.version_info >= (2, 7): pass else: requirements.append(line.strip()) return requirements
def parse_template(template_name): """Given a template name, attempt to extract its group name and upload date Returns: * None if no groups matched * group_name, datestamp of the first matching group. group name will be a string, datestamp with be a :py:class:`datetime.date <python:datetime.date>`, or None if a date can't be derived from the template name """ for group_name, regex in stream_matchers: matches = re.match(regex, template_name) if matches: groups = matches.groupdict() # hilarity may ensue if this code is run right before the new year today = date.today() year = int(groups.get('year', today.year)) month, day = int(groups['month']), int(groups['day']) # validate the template date by turning into a date obj template_date = futurecheck(date(year, month, day)) return TemplateInfo(group_name, template_date, True) for group_name, regex in generic_matchers: matches = re.match(regex, template_name) if matches: return TemplateInfo(group_name, None, False) # If no match, unknown return TemplateInfo('unknown', None, False)
def importAuto(cls, string, path=None, activeFit=None, callback=None, encoding=None): # Get first line and strip space symbols of it to avoid possible detection errors firstLine = re.split("[\n\r]+", string.strip(), maxsplit=1)[0] firstLine = firstLine.strip() # If XML-style start of tag encountered, detect as XML if re.match("<", firstLine): if encoding: return "XML", cls.importXml(string, callback, encoding) else: return "XML", cls.importXml(string, callback) # If JSON-style start, parse as CREST/JSON if firstLine[0] == '{': return "JSON", (cls.importCrest(string),) # If we've got source file name which is used to describe ship name # and first line contains something like [setup name], detect as eft config file if re.match("\[.*\]", firstLine) and path is not None: filename = os.path.split(path)[1] shipName = filename.rsplit('.')[0] return "EFT Config", cls.importEftCfg(shipName, string, callback) # If no file is specified and there's comma between brackets, # consider that we have [ship, setup name] and detect like eft export format if re.match("\[.*,.*\]", firstLine): return "EFT", (cls.importEft(string),) # Use DNA format for all other cases return "DNA", (cls.importDna(string),)
def parse(fh): stats = [] for line in fh: m = re.match(r'TRANSLATION\s+(?P<content>.*)\n', line) if not m: continue line = m.group('content') m = re.match(r'(?P<group>[a-zA-Z_@.]+):', line) if not m: sys.stderr.write('Malformed TRANSLATION line: %s\n' % line) continue stat = {'group': m.group('group')} if stat['group'] == 'total': continue else: sum = 0 for x in stat_types: m = re.search(r'\b(?P<count>\d+) %s (message|translation)' % x, line) if m: stat[x] = int(m.group('count')) sum += stat[x] stat['total'] = sum stats.append(stat) return stats
def check_api_version_decorator(logical_line, previous_logical, blank_before, filename): msg = ("N332: the api_version decorator must be the first decorator" " on a method.") if blank_before == 0 and re.match(api_version_re, logical_line) \ and re.match(decorator_re, previous_logical): yield(0, msg)
def history(self, page): GIT_COMMIT_FIELDS = ["commit", "author", "date", "date_relative", "message"] GIT_LOG_FORMAT = "%x1f".join(["%h", "%an", "%ad", "%ar", "%s"]) + "%x1e" output = git.log("--format=%s" % GIT_LOG_FORMAT, "--follow", "-z", "--shortstat", page.abspath) output = output.split("\n") history = [] for line in output: if "\x1f" in line: log = line.strip("\x1e\x00").split("\x1f") history.append(dict(zip(GIT_COMMIT_FIELDS, log))) else: insertion = re.match(r".* (\d+) insertion", line) deletion = re.match(r".* (\d+) deletion", line) history[-1]["insertion"] = int(insertion.group(1)) if insertion else 0 history[-1]["deletion"] = int(deletion.group(1)) if deletion else 0 max_changes = float(max([(v["insertion"] + v["deletion"]) for v in history])) or 1.0 for v in history: v.update( { "insertion_relative": str((v["insertion"] / max_changes) * 100), "deletion_relative": str((v["deletion"] / max_changes) * 100), } ) return history
def readFile(fileV4, fileV6, trie): # open ipv4 file input = open(fileV4, "r") pattern = '(\d+)\,(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/(\d{1,2}).*' for line in input: result = re.match(pattern, line) if result: address = result.group(2) length = result.group(3) asn = result.group(1) update = True withdrawal = False count = 0 insertTrie(trie, address, length, asn, update, withdrawal, count) # open ipv6 file input = open(fileV6, "r") pattern = '(\d+)\,(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))/(\d{1,3}),.*' for line in input: result = re.match(pattern, line) if result: address = result.group(2) length = result.group(32) asn = result.group(1) update = True withdrawal = False count = 0 insertTrie(trie, address, length, asn, update, withdrawal, count) return trie
def parse_report(path): """ Return the volume informations contained in the SIENAX report. This is a dictionary with keys "grey", "white", and "brain". The informations for the different tissues is a dictionary with the normalized and raw values, in cubic millimeters. adapted from: http://code.google.com/p/medipy/source/browse/plugins/fsl/sienax.py see licence: http://code.google.com/p/medipy/source/browse/LICENSE """ report = {} fd = open(path) for line in fd.readlines() : for tissue in ["GREY", "WHITE", "BRAIN"] : pattern = tissue + r"\s+([\d+\.]+)\s+([\d+\.]+)" measure = re.match(pattern, line) if measure : normalized = float(measure.group(1)) raw = float(measure.group(2)) report[tissue.lower()] = {"normalized" : normalized, "raw" : raw} continue vscale = re.match("VSCALING ([\d\.]+)", line) if vscale : report["vscale"] = float(vscale.group(1)) return report
def LoadUniFile(self, File = None): if File == None: EdkLogger.error("Unicode File Parser", PARSER_ERROR, 'No unicode file is given') self.File = File # # Process special char in file # Lines = self.PreProcess(File) # # Get Unicode Information # for IndexI in range(len(Lines)): Line = Lines[IndexI] if (IndexI + 1) < len(Lines): SecondLine = Lines[IndexI + 1] if (IndexI + 2) < len(Lines): ThirdLine = Lines[IndexI + 2] # # Get Language def information # if Line.find(u'#langdef ') >= 0: self.GetLangDef(File, Line) continue Name = '' Language = '' Value = '' # # Get string def information format 1 as below # # #string MY_STRING_1 # #language eng # My first English string line 1 # My first English string line 2 # #string MY_STRING_1 # #language spa # Mi segunda secuencia 1 # Mi segunda secuencia 2 # if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \ SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \ ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0: Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ') Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ') for IndexJ in range(IndexI + 2, len(Lines)): if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0: Value = Value + Lines[IndexJ] else: IndexI = IndexJ break # Value = Value.replace(u'\r\n', u'') Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File) # Check the string name is the upper character if not self.IsCompatibleMode and Name != '': MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE) if MatchString == None or MatchString.end(0) != len(Name): EdkLogger.error('Unicode File Parser', FORMAT_INVALID, 'The string token name %s defined in UNI file %s contains the invalid lower case character.' %(Name, self.File)) self.AddStringToList(Name, Language, Value) continue # # Get string def information format 2 as below # # #string MY_STRING_1 #language eng "My first English string line 1" # "My first English string line 2" # #language spa "Mi segunda secuencia 1" # "Mi segunda secuencia 2" # #string MY_STRING_2 #language eng "My first English string line 1" # "My first English string line 2" # #string MY_STRING_2 #language spa "Mi segunda secuencia 1" # "Mi segunda secuencia 2" # if Line.find(u'#string ') >= 0 and Line.find(u'#language ') >= 0: StringItem = Line for IndexJ in range(IndexI + 1, len(Lines)): if Lines[IndexJ].find(u'#string ') >= 0 and Lines[IndexJ].find(u'#language ') >= 0: IndexI = IndexJ break elif Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') >= 0: StringItem = StringItem + Lines[IndexJ] elif Lines[IndexJ].count(u'\"') >= 2: StringItem = StringItem[ : StringItem.rfind(u'\"')] + Lines[IndexJ][Lines[IndexJ].find(u'\"') + len(u'\"') : ] self.GetStringObject(StringItem) continue
def _extend(filename, n, keys=()): """ For internal use only. Extend a file. :param file: str :param n: int :param keys: tuple :return: str, set """ with open(filename, 'r') as file: header = file.readline() reader = csv.reader(file) lines = [_ for _ in reader] fname = f"{filename}_{n}.csv" with open(fname, 'w') as file: file.write(header) for line in lines: file.write(','.join(line) + '\n') # file.writelines([','.join(x) for x in lines]) # file.write('\n') if not keys: these_keys = set([line[0].strip() for line in lines]) else: these_keys = set() n = n // 5 for i in range(n): for line in lines: mod_words = line[:] if keys: # Use provided users and products uid = random.choice(keys[0]) pid = random.choice(keys[1]) counter = 0 while (uid, pid) in these_keys: uid = random.choice(keys[0]) pid = random.choice(keys[1]) if counter > 100: break if (uid, pid) in these_keys: continue file.write(f"{uid}, {pid}, {random.randint(1, int(mod_words[-1].strip()) * 2)}\n") else: mod_key = ''.join([random.choice(string.ascii_letters) for _ in range(len(mod_words[0]))]) while mod_key.strip() in these_keys: mod_key = ''.join([random.choice(string.ascii_letters) for _ in range(len(mod_words[0]))]) these_keys.add(mod_key) mod_words[0] = mod_key for j, word in enumerate(line[1:], 1): # If a phone number, randomize digits if re.match(r"\d{3}-\d{3}-\d{4}", word.strip()): num = f"{random.randint(0, 9999999999):09d}" mod_words[j] = num[:3] + '-' + num[3:6] + '-' + num[-4:] # If a number, randomize elif re.fullmatch(r"\d*", word.strip()): num = random.randint(1, int(word.strip()) * 2) mod_words[j] = str(num) else: # Replace 1/2 of characters with random digits mod_locs = [random.randint(0, len(word) - 1) for _ in range(len(word) // 2)] lst = list(word) for loc in mod_locs: lst[loc] = random.choice(string.ascii_letters) mod_words[j] = ''.join(lst) file.write(','.join(mod_words) + '\n') # file.writelines([]) for line in lines]) return fname, these_keys
def _read_cells(f, line): # If the line is self-contained, it is merely a declaration of the total # number of points. if line.count('(') == line.count(')'): return None, None out = re.match('\\s*\\(\\s*(|20|30)12\\s*\\(([^\\)]+)\\).*', line) a = [int(num, 16) for num in out.group(2).split()] assert len(a) > 4 first_index = a[1] last_index = a[2] num_cells = last_index - first_index + 1 element_type = a[4] element_type_to_key_num_nodes = { 0: ('mixed', None), 1: ('triangle', 3), 2: ('tetra', 4), 3: ('quad', 4), 4: ('hexahedron', 8), 5: ('pyra', 5), 6: ('wedge', 6), } key, num_nodes_per_cell = \ element_type_to_key_num_nodes[element_type] # Skip to the opening `(` and make sure that there's no non-whitespace # character between the last closing bracket and the `(`. if line.strip()[-1] != '(': c = None while True: c = f.read(1).decode('utf-8') if c == '(': break if not re.match('\\s', c): # Found a non-whitespace character before `(`. # Assume this is just a declaration line then and # skip to the closing bracket. _skip_to(f, ')') return None, None assert key != 'mixed' # read cell data if out.group(1) == '': # ASCII cells data = numpy.empty((num_cells, num_nodes_per_cell), dtype=int) for k in range(num_cells): line = f.readline().decode('utf-8') dat = line.split() assert len(dat) == num_nodes_per_cell data[k] = [int(d, 16) for d in dat] else: # binary cells if out.group(1) == '20': bytes_per_item = 4 dtype = numpy.int32 else: assert out.group(1) == '30' bytes_per_item = 8 dtype = numpy.int64 total_bytes = \ bytes_per_item * num_nodes_per_cell * num_cells data = numpy.fromstring(f.read(total_bytes), count=(num_nodes_per_cell * num_cells), dtype=dtype).reshape( (num_cells, num_nodes_per_cell)) # make sure that the data set is properly closed _skip_close(f, 2) return key, data
def read(filename): # Initialize the data optional data fields field_data = {} cell_data = {} point_data = {} points = [] cells = {} first_point_index_overall = None last_point_index = None # read file in binary mode since some data might be binary with open(filename, 'rb') as f: while True: line = f.readline().decode('utf-8') if not line: break if line.strip() == '': continue # expect the line to have the form # (<index> [...] out = re.match('\\s*\\(\\s*([0-9]+).*', line) assert out index = out.group(1) if index == '0': # Comment. _skip_close(f, line.count('(') - line.count(')')) elif index == '1': # header # (1 "<text>") _skip_close(f, line.count('(') - line.count(')')) elif index == '2': # dimensionality # (2 3) _skip_close(f, line.count('(') - line.count(')')) elif re.match('(|20|30)10', index): # points pts, first_point_index_overall, last_point_index = \ _read_points( f, line, first_point_index_overall, last_point_index ) if pts is not None: points.append(pts) elif re.match('(|20|30)12', index): # cells # (2012 (zone-id first-index last-index type element-type)) key, data = _read_cells(f, line) if data is not None: cells[key] = data elif re.match('(|20|30)13', index): data = _read_faces(f, line) for key in data: if key in cells: cells[key] = numpy.concatenate([cells[key], data[key]]) else: cells[key] = data[key] elif index == '39': logging.warning( 'Zone specification not supported yet. Skipping.') _skip_close(f, line.count('(') - line.count(')')) elif index == '45': # (45 (2 fluid solid)()) obj = re.match('\\(45 \\([0-9]+ ([\\S]+) ([\\S]+)\\)\\(\\)\\)', line) if obj: logging.warning( 'Zone specification not supported yet (%r, %r). ' 'Skipping.', obj.group(1), obj.group(2)) else: logging.warning('Zone specification not supported yet.') else: logging.warning('Unknown index %r. Skipping.', index) # Skipping ahead to the next line with two closing brackets. _skip_close(f, line.count('(') - line.count(')')) points = numpy.concatenate(points) # Gauge the cells with the first point_index. for key in cells: cells[key] -= first_point_index_overall return points, cells, point_data, cell_data, field_data
def _read_faces(f, line): # faces # (13 (zone-id first-index last-index type element-type)) # If the line is self-contained, it is merely a declaration of # the total number of points. if line.count('(') == line.count(')'): return {} out = re.match('\\s*\\(\\s*(|20|30)13\\s*\\(([^\\)]+)\\).*', line) a = [int(num, 16) for num in out.group(2).split()] assert len(a) > 4 first_index = a[1] last_index = a[2] num_cells = last_index - first_index + 1 element_type = a[4] element_type_to_key_num_nodes = { 0: ('mixed', None), 2: ('line', 2), 3: ('triangle', 3), 4: ('quad', 4) } key, num_nodes_per_cell = \ element_type_to_key_num_nodes[element_type] # Skip ahead to the line that opens the data block (might be # the current line already). if line.strip()[-1] != '(': _skip_to(f, '(') data = {} if out.group(1) == '': # ASCII if key == 'mixed': # From # <http://www.afs.enea.it/fluent/Public/Fluent-Doc/PDF/chp03.pdf>: # > If the face zone is of mixed type (element-type = # > 0), the body of the section will include the face # > type and will appear as follows # > # > type v0 v1 v2 c0 c1 # > for k in range(num_cells): line = '' while line.strip() == '': line = f.readline().decode('utf-8') dat = line.split() type_index = int(dat[0], 16) assert type_index != 0 type_string, num_nodes_per_cell = \ element_type_to_key_num_nodes[type_index] assert len(dat) == num_nodes_per_cell + 3 if type_string not in data: data[type_string] = [] data[type_string].append( [int(d, 16) for d in dat[1:num_nodes_per_cell + 1]]) data = {key: numpy.array(data[key]) for key in data} else: # read cell data data = numpy.empty((num_cells, num_nodes_per_cell), dtype=int) for k in range(num_cells): line = f.readline().decode('utf-8') dat = line.split() # The body of a regular face section contains the # grid connectivity, and each line appears as # follows: # n0 n1 n2 cr cl # where n* are the defining nodes (vertices) of the # face, and c* are the adjacent cells. assert len(dat) == num_nodes_per_cell + 2 data[k] = [int(d, 16) for d in dat[:num_nodes_per_cell]] data = {key: data} else: # binary if out.group(1) == '20': bytes_per_item = 4 dtype = numpy.int32 else: assert out.group(1) == '30' bytes_per_item = 8 dtype = numpy.int64 assert key != 'mixed' # Read cell data. # The body of a regular face section contains the grid # connectivity, and each line appears as follows: # n0 n1 n2 cr cl # where n* are the defining nodes (vertices) of the face, # and c* are the adjacent cells. total_bytes = \ num_cells * bytes_per_item * (num_nodes_per_cell + 2) data = numpy.fromstring(f.read(total_bytes), dtype=dtype).reshape( (num_cells, num_nodes_per_cell + 2)) # Cut off the adjacent cell data. data = data[:, :num_nodes_per_cell] data = {key: data} # make sure that the data set is properly closed _skip_close(f, 2) return data
def HDF5_ATL11_corr_write(IS2_atl11_corr, IS2_atl11_attrs, INPUT=None, FILENAME='', FILL_VALUE=None, DIMENSIONS=None, CROSSOVERS=False, CLOBBER=False): # setting HDF5 clobber attribute if CLOBBER: clobber = 'w' else: clobber = 'w-' # open output HDF5 file fileID = h5py.File(os.path.expanduser(FILENAME), clobber) # create HDF5 records h5 = {} # number of GPS seconds between the GPS epoch (1980-01-06T00:00:00Z UTC) # and ATLAS Standard Data Product (SDP) epoch (2018-01-01T00:00:00Z UTC) h5['ancillary_data'] = {} for k,v in IS2_atl11_corr['ancillary_data'].items(): # Defining the HDF5 dataset variables val = 'ancillary_data/{0}'.format(k) h5['ancillary_data'][k] = fileID.create_dataset(val, np.shape(v), data=v, dtype=v.dtype, compression='gzip') # add HDF5 variable attributes for att_name,att_val in IS2_atl11_attrs['ancillary_data'][k].items(): h5['ancillary_data'][k].attrs[att_name] = att_val # write each output beam pair pairs = [k for k in IS2_atl11_corr.keys() if bool(re.match(r'pt\d',k))] for ptx in pairs: fileID.create_group(ptx) h5[ptx] = {} # add HDF5 group attributes for beam for att_name in ['description','beam_pair','ReferenceGroundTrack', 'first_cycle','last_cycle','equatorial_radius','polar_radius']: fileID[ptx].attrs[att_name] = IS2_atl11_attrs[ptx][att_name] # ref_pt, cycle number, geolocation and delta_time variables for k in ['ref_pt','cycle_number','delta_time','latitude','longitude']: # values and attributes v = IS2_atl11_corr[ptx][k] attrs = IS2_atl11_attrs[ptx][k] fillvalue = FILL_VALUE[ptx][k] # Defining the HDF5 dataset variables val = '{0}/{1}'.format(ptx,k) if fillvalue: h5[ptx][k] = fileID.create_dataset(val, np.shape(v), data=v, dtype=v.dtype, fillvalue=fillvalue, compression='gzip') else: h5[ptx][k] = fileID.create_dataset(val, np.shape(v), data=v, dtype=v.dtype, compression='gzip') # create or attach dimensions for HDF5 variable if DIMENSIONS[ptx][k]: # attach dimensions for i,dim in enumerate(DIMENSIONS[ptx][k]): h5[ptx][k].dims[i].attach_scale(h5[ptx][dim]) else: # make dimension h5[ptx][k].make_scale(k) # add HDF5 variable attributes for att_name,att_val in attrs.items(): h5[ptx][k].attrs[att_name] = att_val # add to cycle_stats variables groups = ['cycle_stats'] # if running crossovers: add to crossing_track_data variables if CROSSOVERS: groups.append('crossing_track_data') for key in groups: fileID[ptx].create_group(key) h5[ptx][key] = {} for att_name in ['Description','data_rate']: att_val=IS2_atl11_attrs[ptx][key][att_name] fileID[ptx][key].attrs[att_name] = att_val for k,v in IS2_atl11_corr[ptx][key].items(): # attributes attrs = IS2_atl11_attrs[ptx][key][k] fillvalue = FILL_VALUE[ptx][key][k] # Defining the HDF5 dataset variables val = '{0}/{1}/{2}'.format(ptx,key,k) if fillvalue: h5[ptx][key][k] = fileID.create_dataset(val, np.shape(v), data=v, dtype=v.dtype, fillvalue=fillvalue, compression='gzip') else: h5[ptx][key][k] = fileID.create_dataset(val, np.shape(v), data=v, dtype=v.dtype, compression='gzip') # create or attach dimensions for HDF5 variable if DIMENSIONS[ptx][key][k]: # attach dimensions for i,dim in enumerate(DIMENSIONS[ptx][key][k]): if (key == 'cycle_stats'): h5[ptx][key][k].dims[i].attach_scale(h5[ptx][dim]) else: h5[ptx][key][k].dims[i].attach_scale(h5[ptx][key][dim]) else: # make dimension h5[ptx][key][k].make_scale(k) # add HDF5 variable attributes for att_name,att_val in attrs.items(): h5[ptx][key][k].attrs[att_name] = att_val # HDF5 file title fileID.attrs['featureType'] = 'trajectory' fileID.attrs['title'] = 'ATLAS/ICESat-2 Annual Land Ice Height' fileID.attrs['summary'] = ('The purpose of ATL11 is to provide an ICESat-2 ' 'satellite cycle summary of heights and height changes of land-based ' 'ice and will be provided as input to ATL15 and ATL16, gridded ' 'estimates of heights and height-changes.') fileID.attrs['description'] = ('Land ice parameters for each beam pair. ' 'All parameters are calculated for the same along-track increments ' 'for each beam pair and repeat.') date_created = datetime.datetime.today() fileID.attrs['date_created'] = date_created.isoformat() project = 'ICESat-2 > Ice, Cloud, and land Elevation Satellite-2' fileID.attrs['project'] = project platform = 'ICESat-2 > Ice, Cloud, and land Elevation Satellite-2' fileID.attrs['project'] = platform # add attribute for elevation instrument and designated processing level instrument = 'ATLAS > Advanced Topographic Laser Altimeter System' fileID.attrs['instrument'] = instrument fileID.attrs['source'] = 'Spacecraft' fileID.attrs['references'] = 'https://nsidc.org/data/icesat-2' fileID.attrs['processing_level'] = '4' # add attributes for input ATL11 files fileID.attrs['input_files'] = os.path.basename(INPUT) # find geospatial and temporal ranges lnmn,lnmx,ltmn,ltmx,tmn,tmx = (np.inf,-np.inf,np.inf,-np.inf,np.inf,-np.inf) for ptx in pairs: lon = IS2_atl11_corr[ptx]['longitude'] lat = IS2_atl11_corr[ptx]['latitude'] delta_time = IS2_atl11_corr[ptx]['delta_time'] valid = np.nonzero(delta_time != FILL_VALUE[ptx]['delta_time']) # setting the geospatial and temporal ranges lnmn = lon.min() if (lon.min() < lnmn) else lnmn lnmx = lon.max() if (lon.max() > lnmx) else lnmx ltmn = lat.min() if (lat.min() < ltmn) else ltmn ltmx = lat.max() if (lat.max() > ltmx) else ltmx tmn = delta_time[valid].min() if (delta_time[valid].min() < tmn) else tmn tmx = delta_time[valid].max() if (delta_time[valid].max() > tmx) else tmx # add geospatial and temporal attributes fileID.attrs['geospatial_lat_min'] = ltmn fileID.attrs['geospatial_lat_max'] = ltmx fileID.attrs['geospatial_lon_min'] = lnmn fileID.attrs['geospatial_lon_max'] = lnmx fileID.attrs['geospatial_lat_units'] = "degrees_north" fileID.attrs['geospatial_lon_units'] = "degrees_east" fileID.attrs['geospatial_ellipsoid'] = "WGS84" fileID.attrs['date_type'] = 'UTC' fileID.attrs['time_type'] = 'CCSDS UTC-A' # convert start and end time from ATLAS SDP seconds into Julian days JD = convert_delta_time(np.array([tmn,tmx]))['julian'] # convert to calendar date YY,MM,DD,HH,MN,SS = SMBcorr.time.convert_julian(JD,FORMAT='tuple') # add attributes with measurement date start, end and duration tcs = datetime.datetime(int(YY[0]), int(MM[0]), int(DD[0]), int(HH[0]), int(MN[0]), int(SS[0]), int(1e6*(SS[0] % 1))) fileID.attrs['time_coverage_start'] = tcs.isoformat() tce = datetime.datetime(int(YY[1]), int(MM[1]), int(DD[1]), int(HH[1]), int(MN[1]), int(SS[1]), int(1e6*(SS[1] % 1))) fileID.attrs['time_coverage_end'] = tce.isoformat() fileID.attrs['time_coverage_duration'] = '{0:0.0f}'.format(tmx-tmn) # Closing the HDF5 file fileID.close()
def interp_SMB_ICESat2(base_dir, FILE, model_version, CROSSOVERS=False, GZIP=False, VERBOSE=False, MODE=0o775): # read data from input file print('{0} -->'.format(os.path.basename(FILE))) if VERBOSE else None # Open the HDF5 file for reading fileID = h5py.File(FILE, 'r') # output data directory ddir = os.path.dirname(FILE) # extract parameters from ICESat-2 ATLAS HDF5 file name rx = re.compile(r'(processed_)?(ATL\d{2})_(\d{4})(\d{2})_(\d{2})(\d{2})_' r'(\d{3})_(\d{2})(.*?).h5$') SUB,PRD,TRK,GRAN,SCYC,ECYC,RL,VERS,AUX = rx.findall(FILE).pop() # get projection and region name based on granule REGION,proj4_params = set_projection(GRAN) # determine main model group from region and model_version MODEL, = [key for key,val in models[REGION].items() if model_version in val] # keyword arguments for all models KWARGS = dict(SIGMA=1.5, FILL_VALUE=np.nan) # set model specific parameters if (MODEL == 'MAR'): match_object=re.match(r'(MARv\d+\.\d+(.\d+)?)',model_version) MAR_VERSION=match_object.group(0) MAR_REGION=dict(GL='Greenland',AA='Antarctic')[REGION] # model subdirectories SUBDIRECTORY=dict(AA={}, GL={}) SUBDIRECTORY['GL']['MARv3.9-ERA']=['ERA_1958-2018_10km','daily_10km'] SUBDIRECTORY['GL']['MARv3.10-ERA']=['ERA_1958-2019-15km','daily_15km'] SUBDIRECTORY['GL']['MARv3.11-NCEP']=['NCEP1_1948-2020_20km','daily_20km'] SUBDIRECTORY['GL']['MARv3.11-ERA']=['ERA_1958-2019-15km','daily_15km'] SUBDIRECTORY['GL']['MARv3.11.2-ERA-6km']=['6km_ERA5'] SUBDIRECTORY['GL']['MARv3.11.2-ERA-7.5km']=['7.5km_ERA5'] SUBDIRECTORY['GL']['MARv3.11.2-ERA-10km']=['10km_ERA5'] SUBDIRECTORY['GL']['MARv3.11.2-ERA-15km']=['15km_ERA5'] SUBDIRECTORY['GL']['MARv3.11.2-ERA-20km']=['20km_ERA5'] SUBDIRECTORY['GL']['MARv3.11.2-NCEP-20km']=['20km_NCEP1'] SUBDIRECTORY['GL']['MARv3.11.5-ERA-6km']=['6km_ERA5'] SUBDIRECTORY['GL']['MARv3.11.5-ERA-10km']=['10km_ERA5'] SUBDIRECTORY['GL']['MARv3.11.5-ERA-15km']=['15km_ERA5'] SUBDIRECTORY['GL']['MARv3.11.5-ERA-20km']=['20km_ERA5'] MAR_MODEL=SUBDIRECTORY[REGION][model_version] DIRECTORY=os.path.join(base_dir,'MAR',MAR_VERSION,MAR_REGION,*MAR_MODEL) # keyword arguments for variable coordinates MAR_KWARGS=dict(AA={}, GL={}) MAR_KWARGS['GL']['MARv3.9-ERA'] = dict(XNAME='X10_153',YNAME='Y21_288') MAR_KWARGS['GL']['MARv3.10-ERA'] = dict(XNAME='X10_105',YNAME='Y21_199') MAR_KWARGS['GL']['MARv3.11-NCEP'] = dict(XNAME='X12_84',YNAME='Y21_155') MAR_KWARGS['GL']['MARv3.11-ERA'] = dict(XNAME='X10_105',YNAME='Y21_199') MAR_KWARGS['GL']['MARv3.11.2-ERA-6km'] = dict(XNAME='X12_251',YNAME='Y20_465') MAR_KWARGS['GL']['MARv3.11.2-ERA-7.5km'] = dict(XNAME='X12_203',YNAME='Y20_377') MAR_KWARGS['GL']['MARv3.11.2-ERA-10km'] = dict(XNAME='X10_153',YNAME='Y21_288') MAR_KWARGS['GL']['MARv3.11.2-ERA-15km'] = dict(XNAME='X10_105',YNAME='Y21_199') MAR_KWARGS['GL']['MARv3.11.2-ERA-20km'] = dict(XNAME='X12_84',YNAME='Y21_155') MAR_KWARGS['GL']['MARv3.11.2-NCEP-20km'] = dict(XNAME='X12_84',YNAME='Y21_155') MAR_KWARGS['GL']['MARv3.11.5-ERA-6km'] = dict(XNAME='X12_251',YNAME='Y20_465') MAR_KWARGS['GL']['MARv3.11.5-ERA-10km'] = dict(XNAME='X10_153',YNAME='Y21_288') MAR_KWARGS['GL']['MARv3.11.5-ERA-15km'] = dict(XNAME='X10_105',YNAME='Y21_199') MAR_KWARGS['GL']['MARv3.11.5-ERA-20km'] = dict(XNAME='X12_84',YNAME='Y21_155') KWARGS.update(MAR_KWARGS[REGION][model_version]) # netCDF4 variable names for direct fields VARIABLES = ['SMB','ZN6','ZN4','ZN5'] # output variable keys for both direct and derived fields KEYS = ['SMB','zsurf','zfirn','zmelt','zsmb','zaccum'] # HDF5 longname and description attributes for each variable LONGNAME = {} LONGNAME['SMB'] = "Cumulative SMB" LONGNAME['zsurf'] = "Height" LONGNAME['zfirn'] = "Compaction" LONGNAME['zmelt'] = "Surface Melt" LONGNAME['zsmb'] = "Surface Mass Balance" LONGNAME['zaccum'] = "Surface Accumulation" DESCRIPTION = {} DESCRIPTION['SMB'] = "Cumulative Surface Mass Balance" DESCRIPTION['zsurf'] = "Snow Height Change" DESCRIPTION['zfirn'] = "Snow Height Change due to Compaction" DESCRIPTION['zmelt'] = "Snow Height Change due to Surface Melt" DESCRIPTION['zsmb'] = "Snow Height Change due to Surface Mass Balance" DESCRIPTION['zaccum'] = "Snow Height Change due to Surface Accumulation" elif (MODEL == 'RACMO'): RACMO_VERSION,RACMO_MODEL=model_version.split('-') # netCDF4 variable names VARIABLES = ['hgtsrf'] # output variable keys KEYS = ['zsurf'] # HDF5 longname attributes for each variable LONGNAME = {} LONGNAME['zsurf'] = "Height" DESCRIPTION = {} DESCRIPTION['zsurf'] = "Snow Height Change" elif (MODEL == 'MERRA2-hybrid'): # regular expression pattern for extracting version merra2_regex = re.compile(r'GSFC-fdm-((v\d+)(\.\d+)?)$') # get MERRA-2 version and major version MERRA2_VERSION = merra2_regex.match(model_version).group(1) # MERRA-2 hybrid directory DIRECTORY=os.path.join(base_dir,'MERRA2_hybrid',MERRA2_VERSION) # MERRA-2 region name from ATL11 region MERRA2_REGION = dict(AA='ais',GL='gris')[REGION] # keyword arguments for MERRA-2 interpolation programs if MERRA2_VERSION in ('v0','v1','v1.0'): KWARGS['VERSION'] = merra2_regex.match(model_version).group(2) # netCDF4 variable names VARIABLES = ['FAC','cum_smb_anomaly','height'] # add additional Greenland variables if (MERRA2_REGION == 'gris'): VARIABLES.append('runoff_anomaly') else: KWARGS['VERSION'] = MERRA2_VERSION.replace('.','_') # netCDF4 variable names VARIABLES = ['FAC','SMB_a','h_a'] # add additional Greenland variables if (MERRA2_REGION == 'gris'): VARIABLES.append('Me_a') # use compressed files KWARGS['GZIP'] = GZIP # output variable keys KEYS = ['zsurf','zfirn','zsmb','zmelt'] # HDF5 longname and description attributes for each variable LONGNAME = {} LONGNAME['zsurf'] = "Height" LONGNAME['zfirn'] = "Compaction" LONGNAME['zsmb'] = "Surface Mass Balance" LONGNAME['zmelt'] = "Surface Melt" DESCRIPTION = {} DESCRIPTION['zsurf'] = "Snow Height Change" DESCRIPTION['zfirn'] = "Snow Height Change due to Compaction" DESCRIPTION['zsmb'] = "Snow Height Change due to Surface Mass Balance" DESCRIPTION['zmelt'] = "Snow Height Change due to Surface Melt" # pyproj transformer for converting from latitude/longitude # into polar stereographic coordinates crs1 = pyproj.CRS.from_string("epsg:{0:d}".format(4326)) crs2 = pyproj.CRS.from_string(proj4_params) transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True) # read each input beam pair within the file IS2_atl11_pairs = [] for ptx in [k for k in fileID.keys() if bool(re.match(r'pt\d',k))]: # check if subsetted beam contains reference points try: fileID[ptx]['ref_pt'] except KeyError: pass else: IS2_atl11_pairs.append(ptx) # copy variables for outputting to HDF5 file IS2_atl11_corr = {} IS2_atl11_fill = {} IS2_atl11_dims = {} IS2_atl11_corr_attrs = {} # number of GPS seconds between the GPS epoch (1980-01-06T00:00:00Z UTC) # and ATLAS Standard Data Product (SDP) epoch (2018-01-01T00:00:00Z UTC) # Add this value to delta time parameters to compute full gps_seconds IS2_atl11_corr['ancillary_data'] = {} IS2_atl11_corr_attrs['ancillary_data'] = {} for key in ['atlas_sdp_gps_epoch']: # get each HDF5 variable IS2_atl11_corr['ancillary_data'][key] = fileID['ancillary_data'][key][:] # Getting attributes of group and included variables IS2_atl11_corr_attrs['ancillary_data'][key] = {} for att_name,att_val in fileID['ancillary_data'][key].attrs.items(): IS2_atl11_corr_attrs['ancillary_data'][key][att_name] = att_val # HDF5 group name for across-track data XT = 'crossing_track_data' # for each input beam pair within the file for ptx in sorted(IS2_atl11_pairs): # output data dictionaries for beam IS2_atl11_corr[ptx] = dict(cycle_stats=collections.OrderedDict(), crossing_track_data=collections.OrderedDict()) IS2_atl11_fill[ptx] = dict(cycle_stats={},crossing_track_data={}) IS2_atl11_dims[ptx] = dict(cycle_stats={},crossing_track_data={}) IS2_atl11_corr_attrs[ptx] = dict(cycle_stats={},crossing_track_data={}) # extract along-track and across-track variables ref_pt = {} latitude = {} longitude = {} delta_time = {} groups = ['AT'] # dictionary with output variables OUTPUT = {} # number of average segments and number of included cycles # fill_value for invalid heights and corrections fv = fileID[ptx]['h_corr'].attrs['_FillValue'] # shape of along-track data n_points,n_cycles = fileID[ptx]['delta_time'][:].shape # along-track (AT) reference point, latitude, longitude and time ref_pt['AT'] = fileID[ptx]['ref_pt'][:].copy() latitude['AT'] = np.ma.array(fileID[ptx]['latitude'][:], fill_value=fileID[ptx]['latitude'].attrs['_FillValue']) latitude['AT'].mask = (latitude['AT'] == latitude['AT'].fill_value) longitude['AT'] = np.ma.array(fileID[ptx]['longitude'][:], fill_value=fileID[ptx]['longitude'].attrs['_FillValue']) longitude['AT'].mask = (longitude['AT'] == longitude['AT'].fill_value) delta_time['AT'] = np.ma.array(fileID[ptx]['delta_time'][:], fill_value=fileID[ptx]['delta_time'].attrs['_FillValue']) delta_time['AT'].mask = (delta_time['AT'] == delta_time['AT'].fill_value) # allocate for output height for along-track data OUTPUT['AT'] = {} for key in KEYS: OUTPUT['AT'][key] = np.ma.empty((n_points,n_cycles),fill_value=fv) OUTPUT['AT'][key].mask = np.ones((n_points,n_cycles),dtype=bool) OUTPUT['AT'][key].interpolation = np.zeros((n_points,n_cycles),dtype=np.uint8) # if running ATL11 crossovers if CROSSOVERS: # add to group groups.append('XT') # shape of across-track data n_cross, = fileID[ptx][XT]['delta_time'].shape # across-track (XT) reference point, latitude, longitude and time ref_pt['XT'] = fileID[ptx][XT]['ref_pt'][:].copy() latitude['XT'] = np.ma.array(fileID[ptx][XT]['latitude'][:], fill_value=fileID[ptx][XT]['latitude'].attrs['_FillValue']) latitude['XT'].mask = (latitude['XT'] == latitude['XT'].fill_value) longitude['XT'] = np.ma.array(fileID[ptx][XT]['longitude'][:], fill_value=fileID[ptx][XT]['longitude'].attrs['_FillValue']) latitude['XT'].mask = (latitude['XT'] == longitude['XT'].fill_value) delta_time['XT'] = np.ma.array(fileID[ptx][XT]['delta_time'][:], fill_value=fileID[ptx][XT]['delta_time'].attrs['_FillValue']) delta_time['XT'].mask = (delta_time['XT'] == delta_time['XT'].fill_value) # allocate for output height for across-track data OUTPUT['XT'] = {} for key in KEYS: OUTPUT['XT'][key] = np.ma.empty((n_cross),fill_value=fv) OUTPUT['XT'][key].mask = np.ones((n_cross),dtype=bool) OUTPUT['XT'][key].interpolation = np.zeros((n_cross),dtype=np.uint8) # extract lat/lon and convert to polar stereographic X,Y = transformer.transform(longitude['AT'],longitude['AT']) # for each valid cycle of ICESat-2 ATL11 data for c in range(n_cycles): # find valid elevations for cycle valid = np.logical_not(delta_time['AT'].mask[:,c]) i, = np.nonzero(valid) # convert time from ATLAS SDP to date in decimal-years tdec = convert_delta_time(delta_time['AT'][i,c])['decimal'] if (MODEL == 'MAR') and np.any(valid): # read and interpolate daily MAR outputs for key,var in zip(KEYS,VARIABLES): OUT = SMBcorr.interpolate_mar_daily(DIRECTORY, proj4_params, MAR_VERSION, tdec, X[i], Y[i], VARIABLE=var, **KWARGS) # set attributes to output for iteration OUTPUT['AT'][key].data[i,c] = np.copy(OUT.data) OUTPUT['AT'][key].mask[i,c] = np.copy(OUT.mask) OUTPUT['AT'][key].interpolation[i,c] = np.copy(OUT.interpolation) # calculate derived fields OUTPUT['AT']['zsmb'].data[i,c] = OUTPUT['AT']['zsurf'].data[i,c] - \ OUTPUT['AT']['zfirn'].data[i,c] OUTPUT['AT']['zsmb'].mask[i,c] = OUTPUT['AT']['zsurf'].mask[i,c] | \ OUTPUT['AT']['zfirn'].mask[i,c] OUTPUT['AT']['zaccum'].data[i,c] = OUTPUT['AT']['zsurf'].data[i,c] - \ OUTPUT['AT']['zfirn'].data[i,c] - OUTPUT['AT']['zmelt'].data OUTPUT['AT']['zaccum'].mask[i,c] = OUTPUT['AT']['zsurf'].mask[i,c] | \ OUTPUT['AT']['zfirn'].mask[i,c] | OUTPUT['AT']['zmelt'].mask[i,c] elif (MODEL == 'RACMO') and np.any(valid): # read and interpolate daily RACMO outputs for key,var in zip(KEYS,VARIABLES): OUT = SMBcorr.interpolate_racmo_daily(base_dir, proj4_params, RACMO_MODEL, tdec, X[i], Y[i], VARIABLE=var, **KWARGS) # set attributes to output for iteration OUTPUT['AT'][key].data[i,c] = np.copy(OUT.data) OUTPUT['AT'][key].mask[i,c] = np.copy(OUT.mask) OUTPUT['AT'][key].interpolation[i,c] = np.copy(OUT.interpolation) elif (MODEL == 'MERRA2-hybrid') and np.any(valid): # read and interpolate 5-day MERRA2-Hybrid outputs for key,var in zip(KEYS,VARIABLES): OUT = SMBcorr.interpolate_merra_hybrid(DIRECTORY, proj4_params, MERRA2_REGION, tdec, X[i], Y[i], VARIABLE=var, **KWARGS) # set attributes to output for iteration OUTPUT['AT'][key].data[i,c] = np.copy(OUT.data) OUTPUT['AT'][key].mask[i,c] = np.copy(OUT.mask) OUTPUT['AT'][key].interpolation[i,c] = np.copy(OUT.interpolation) #-- if interpolating to ATL11 crossover locations if CROSSOVERS: # extract lat/lon and convert to polar stereographic X,Y = transformer.transform(longitude['XT'],longitude['XT']) # find valid elevations for cycle valid = np.logical_not(delta_time['XT'].mask[:]) i, = np.nonzero(valid) # convert time from ATLAS SDP to date in decimal-years tdec = convert_delta_time(delta_time['XT'][i])['decimal'] if (MODEL == 'MAR') and np.any(valid): # read and interpolate daily MAR outputs for key,var in zip(KEYS,VARIABLES): OUT = SMBcorr.interpolate_mar_daily(DIRECTORY, proj4_params, MAR_VERSION, tdec, X[i], Y[i], VARIABLE=var, **KWARGS) # set attributes to output for iteration OUTPUT['XT'][key].data[i] = np.copy(OUT.data) OUTPUT['XT'][key].mask[i] = np.copy(OUT.mask) OUTPUT['XT'][key].interpolation[i] = np.copy(OUT.interpolation) # calculate derived fields OUTPUT['XT']['zsmb'].data[i] = OUTPUT['XT']['zsurf'].data[i] - \ OUTPUT['XT']['zfirn'].data[i] OUTPUT['XT']['zsmb'].mask[i] = OUTPUT['XT']['zsurf'].mask[i] | \ OUTPUT['XT']['zfirn'].mask[i] OUTPUT['XT']['zaccum'].data[i] = OUTPUT['XT']['zsurf'].data[i] - \ OUTPUT['XT']['zfirn'].data[i] - OUTPUT['AT']['zmelt'].data[i] OUTPUT['XT']['zaccum'].mask[i] = OUTPUT['XT']['zsurf'].mask[i] | \ OUTPUT['XT']['zfirn'].mask[i] | OUTPUT['XT']['zmelt'].mask[i] elif (MODEL == 'RACMO') and np.any(valid): # read and interpolate daily RACMO outputs for key,var in zip(KEYS,VARIABLES): OUT = SMBcorr.interpolate_racmo_daily(base_dir, proj4_params, RACMO_MODEL, tdec, X[i], Y[i], VARIABLE=var, **KWARGS) # set attributes to output for iteration OUTPUT['XT'][key].data[i] = np.copy(OUT.data) OUTPUT['XT'][key].mask[i] = np.copy(OUT.mask) OUTPUT['XT'][key].interpolation[i] = np.copy(OUT.interpolation) elif (MODEL == 'MERRA2-hybrid') and np.any(valid): # read and interpolate 5-day MERRA2-Hybrid outputs for key,var in zip(KEYS,VARIABLES): OUT = SMBcorr.interpolate_merra_hybrid(DIRECTORY, proj4_params, MERRA2_REGION, tdec, X[i], Y[i], VARIABLE=var, **KWARGS) # set attributes to output for iteration OUTPUT['XT'][key].data[i] = np.copy(OUT.data) OUTPUT['XT'][key].mask[i] = np.copy(OUT.mask) OUTPUT['XT'][key].interpolation[i] = np.copy(OUT.interpolation) # group attributes for beam IS2_atl11_corr_attrs[ptx]['description'] = ('Contains the primary science parameters ' 'for this data set') IS2_atl11_corr_attrs[ptx]['beam_pair'] = fileID[ptx].attrs['beam_pair'] IS2_atl11_corr_attrs[ptx]['ReferenceGroundTrack'] = fileID[ptx].attrs['ReferenceGroundTrack'] IS2_atl11_corr_attrs[ptx]['first_cycle'] = fileID[ptx].attrs['first_cycle'] IS2_atl11_corr_attrs[ptx]['last_cycle'] = fileID[ptx].attrs['last_cycle'] IS2_atl11_corr_attrs[ptx]['equatorial_radius'] = fileID[ptx].attrs['equatorial_radius'] IS2_atl11_corr_attrs[ptx]['polar_radius'] = fileID[ptx].attrs['polar_radius'] # geolocation, time and reference point # reference point IS2_atl11_corr[ptx]['ref_pt'] = ref_pt['AT'].copy() IS2_atl11_fill[ptx]['ref_pt'] = None IS2_atl11_dims[ptx]['ref_pt'] = None IS2_atl11_corr_attrs[ptx]['ref_pt'] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx]['ref_pt']['units'] = "1" IS2_atl11_corr_attrs[ptx]['ref_pt']['contentType'] = "referenceInformation" IS2_atl11_corr_attrs[ptx]['ref_pt']['long_name'] = "Reference point number" IS2_atl11_corr_attrs[ptx]['ref_pt']['source'] = "ATL06" IS2_atl11_corr_attrs[ptx]['ref_pt']['description'] = ("The reference point is the " "7 digit segment_id number corresponding to the center of the ATL06 data used " "for each ATL11 point. These are sequential, starting with 1 for the first " "segment after an ascending equatorial crossing node.") IS2_atl11_corr_attrs[ptx]['ref_pt']['coordinates'] = \ "delta_time latitude longitude" # cycle_number IS2_atl11_corr[ptx]['cycle_number'] = fileID[ptx]['cycle_number'][:].copy() IS2_atl11_fill[ptx]['cycle_number'] = None IS2_atl11_dims[ptx]['cycle_number'] = None IS2_atl11_corr_attrs[ptx]['cycle_number'] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx]['cycle_number']['units'] = "1" IS2_atl11_corr_attrs[ptx]['cycle_number']['long_name'] = "Orbital cycle number" IS2_atl11_corr_attrs[ptx]['cycle_number']['source'] = "ATL06" IS2_atl11_corr_attrs[ptx]['cycle_number']['description'] = ("Number of 91-day periods " "that have elapsed since ICESat-2 entered the science orbit. Each of the 1,387 " "reference ground track (RGTs) is targeted in the polar regions once " "every 91 days.") # delta time IS2_atl11_corr[ptx]['delta_time'] = delta_time['AT'].copy() IS2_atl11_fill[ptx]['delta_time'] = delta_time['AT'].fill_value IS2_atl11_dims[ptx]['delta_time'] = ['ref_pt','cycle_number'] IS2_atl11_corr_attrs[ptx]['delta_time'] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx]['delta_time']['units'] = "seconds since 2018-01-01" IS2_atl11_corr_attrs[ptx]['delta_time']['long_name'] = "Elapsed GPS seconds" IS2_atl11_corr_attrs[ptx]['delta_time']['standard_name'] = "time" IS2_atl11_corr_attrs[ptx]['delta_time']['calendar'] = "standard" IS2_atl11_corr_attrs[ptx]['delta_time']['source'] = "ATL06" IS2_atl11_corr_attrs[ptx]['delta_time']['description'] = ("Number of GPS " "seconds since the ATLAS SDP epoch. The ATLAS Standard Data Products (SDP) epoch offset " "is defined within /ancillary_data/atlas_sdp_gps_epoch as the number of GPS seconds " "between the GPS epoch (1980-01-06T00:00:00.000000Z UTC) and the ATLAS SDP epoch. By " "adding the offset contained within atlas_sdp_gps_epoch to delta time parameters, the " "time in gps_seconds relative to the GPS epoch can be computed.") IS2_atl11_corr_attrs[ptx]['delta_time']['coordinates'] = \ "ref_pt cycle_number latitude longitude" # latitude IS2_atl11_corr[ptx]['latitude'] = latitude['AT'].copy() IS2_atl11_fill[ptx]['latitude'] = latitude['AT'].fill_value IS2_atl11_dims[ptx]['latitude'] = ['ref_pt'] IS2_atl11_corr_attrs[ptx]['latitude'] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx]['latitude']['units'] = "degrees_north" IS2_atl11_corr_attrs[ptx]['latitude']['contentType'] = "physicalMeasurement" IS2_atl11_corr_attrs[ptx]['latitude']['long_name'] = "Latitude" IS2_atl11_corr_attrs[ptx]['latitude']['standard_name'] = "latitude" IS2_atl11_corr_attrs[ptx]['latitude']['source'] = "ATL06" IS2_atl11_corr_attrs[ptx]['latitude']['description'] = ("Center latitude of " "selected segments") IS2_atl11_corr_attrs[ptx]['latitude']['valid_min'] = -90.0 IS2_atl11_corr_attrs[ptx]['latitude']['valid_max'] = 90.0 IS2_atl11_corr_attrs[ptx]['latitude']['coordinates'] = \ "ref_pt delta_time longitude" # longitude IS2_atl11_corr[ptx]['longitude'] = longitude['AT'].copy() IS2_atl11_fill[ptx]['longitude'] = longitude['AT'].fill_value IS2_atl11_dims[ptx]['longitude'] = ['ref_pt'] IS2_atl11_corr_attrs[ptx]['longitude'] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx]['longitude']['units'] = "degrees_east" IS2_atl11_corr_attrs[ptx]['longitude']['contentType'] = "physicalMeasurement" IS2_atl11_corr_attrs[ptx]['longitude']['long_name'] = "Longitude" IS2_atl11_corr_attrs[ptx]['longitude']['standard_name'] = "longitude" IS2_atl11_corr_attrs[ptx]['longitude']['source'] = "ATL06" IS2_atl11_corr_attrs[ptx]['longitude']['description'] = ("Center longitude of " "selected segments") IS2_atl11_corr_attrs[ptx]['longitude']['valid_min'] = -180.0 IS2_atl11_corr_attrs[ptx]['longitude']['valid_max'] = 180.0 IS2_atl11_corr_attrs[ptx]['longitude']['coordinates'] = \ "ref_pt delta_time latitude" # cycle statistics variables IS2_atl11_corr_attrs[ptx]['cycle_stats']['Description'] = ("The cycle_stats subgroup " "contains summary information about segments for each reference point, including " "the uncorrected mean heights for reference surfaces, blowing snow and cloud " "indicators, and geolocation and height misfit statistics.") IS2_atl11_corr_attrs[ptx]['cycle_stats']['data_rate'] = ("Data within this group " "are stored at the average segment rate.") # for each along-track dataset for key,val in OUTPUT['AT'].items(): # add to output IS2_atl11_corr[ptx]['cycle_stats'][key] = val.copy() IS2_atl11_fill[ptx]['cycle_stats'][key] = val.fill_value IS2_atl11_dims[ptx]['cycle_stats'][key] = ['ref_pt','cycle_number'] IS2_atl11_corr_attrs[ptx]['cycle_stats'][key] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['units'] = "meters" IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['contentType'] = "referenceInformation" IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['long_name'] = LONGNAME[key] IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['description'] = DESCRIPTION[key] IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['source'] = MODEL IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['reference'] = model_version IS2_atl11_corr_attrs[ptx]['cycle_stats'][key]['coordinates'] = \ "../ref_pt ../cycle_number ../delta_time ../latitude ../longitude" # if crossover measurements were calculated if CROSSOVERS: # crossing track variables IS2_atl11_corr_attrs[ptx][XT]['Description'] = ("The crossing_track_data " "subgroup contains elevation data at crossover locations. These are " "locations where two ICESat-2 pair tracks cross, so data are available " "from both the datum track, for which the granule was generated, and " "from the crossing track.") IS2_atl11_corr_attrs[ptx][XT]['data_rate'] = ("Data within this group are " "stored at the average segment rate.") # reference point IS2_atl11_corr[ptx][XT]['ref_pt'] = ref_pt['XT'].copy() IS2_atl11_fill[ptx][XT]['ref_pt'] = None IS2_atl11_dims[ptx][XT]['ref_pt'] = None IS2_atl11_corr_attrs[ptx][XT]['ref_pt'] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['units'] = "1" IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['contentType'] = "referenceInformation" IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['long_name'] = ("fit center reference point number, " "segment_id") IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['source'] = "derived, ATL11 algorithm" IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['description'] = ("The reference-point number of the " "fit center for the datum track. The reference point is the 7 digit segment_id number " "corresponding to the center of the ATL06 data used for each ATL11 point. These are " "sequential, starting with 1 for the first segment after an ascending equatorial " "crossing node.") IS2_atl11_corr_attrs[ptx][XT]['ref_pt']['coordinates'] = \ "delta_time latitude longitude" # reference ground track of the crossing track IS2_atl11_corr[ptx][XT]['rgt'] = fileID[ptx][XT]['rgt'][:].copy() IS2_atl11_fill[ptx][XT]['rgt'] = fileID[ptx][XT]['rgt'].attrs['_FillValue'] IS2_atl11_dims[ptx][XT]['rgt'] = None IS2_atl11_corr_attrs[ptx][XT]['rgt'] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx][XT]['rgt']['units'] = "1" IS2_atl11_corr_attrs[ptx][XT]['rgt']['contentType'] = "referenceInformation" IS2_atl11_corr_attrs[ptx][XT]['rgt']['long_name'] = "crossover reference ground track" IS2_atl11_corr_attrs[ptx][XT]['rgt']['source'] = "ATL06" IS2_atl11_corr_attrs[ptx][XT]['rgt']['description'] = "The RGT number for the crossing data." IS2_atl11_corr_attrs[ptx][XT]['rgt']['coordinates'] = \ "ref_pt delta_time latitude longitude" # cycle_number of the crossing track IS2_atl11_corr[ptx][XT]['cycle_number'] = fileID[ptx][XT]['cycle_number'][:].copy() IS2_atl11_fill[ptx][XT]['cycle_number'] = fileID[ptx][XT]['cycle_number'].attrs['_FillValue'] IS2_atl11_dims[ptx][XT]['cycle_number'] = None IS2_atl11_corr_attrs[ptx][XT]['cycle_number'] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx][XT]['cycle_number']['units'] = "1" IS2_atl11_corr_attrs[ptx][XT]['cycle_number']['long_name'] = "crossover cycle number" IS2_atl11_corr_attrs[ptx][XT]['cycle_number']['source'] = "ATL06" IS2_atl11_corr_attrs[ptx][XT]['cycle_number']['description'] = ("Cycle number for the " "crossing data. Number of 91-day periods that have elapsed since ICESat-2 entered " "the science orbit. Each of the 1,387 reference ground track (RGTs) is targeted " "in the polar regions once every 91 days.") # delta time of the crossing track IS2_atl11_corr[ptx][XT]['delta_time'] = delta_time['XT'].copy() IS2_atl11_fill[ptx][XT]['delta_time'] = delta_time['XT'].fill_value IS2_atl11_dims[ptx][XT]['delta_time'] = ['ref_pt'] IS2_atl11_corr_attrs[ptx][XT]['delta_time'] = {} IS2_atl11_corr_attrs[ptx][XT]['delta_time']['units'] = "seconds since 2018-01-01" IS2_atl11_corr_attrs[ptx][XT]['delta_time']['long_name'] = "Elapsed GPS seconds" IS2_atl11_corr_attrs[ptx][XT]['delta_time']['standard_name'] = "time" IS2_atl11_corr_attrs[ptx][XT]['delta_time']['calendar'] = "standard" IS2_atl11_corr_attrs[ptx][XT]['delta_time']['source'] = "ATL06" IS2_atl11_corr_attrs[ptx][XT]['delta_time']['description'] = ("Number of GPS " "seconds since the ATLAS SDP epoch. The ATLAS Standard Data Products (SDP) epoch offset " "is defined within /ancillary_data/atlas_sdp_gps_epoch as the number of GPS seconds " "between the GPS epoch (1980-01-06T00:00:00.000000Z UTC) and the ATLAS SDP epoch. By " "adding the offset contained within atlas_sdp_gps_epoch to delta time parameters, the " "time in gps_seconds relative to the GPS epoch can be computed.") IS2_atl11_corr_attrs[ptx]['delta_time']['coordinates'] = \ "ref_pt latitude longitude" # latitude of the crossover measurement IS2_atl11_corr[ptx][XT]['latitude'] = latitude['XT'].copy() IS2_atl11_fill[ptx][XT]['latitude'] = latitude['XT'].fill_value IS2_atl11_dims[ptx][XT]['latitude'] = ['ref_pt'] IS2_atl11_corr_attrs[ptx][XT]['latitude'] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx][XT]['latitude']['units'] = "degrees_north" IS2_atl11_corr_attrs[ptx][XT]['latitude']['contentType'] = "physicalMeasurement" IS2_atl11_corr_attrs[ptx][XT]['latitude']['long_name'] = "crossover latitude" IS2_atl11_corr_attrs[ptx][XT]['latitude']['standard_name'] = "latitude" IS2_atl11_corr_attrs[ptx][XT]['latitude']['source'] = "ATL06" IS2_atl11_corr_attrs[ptx][XT]['latitude']['description'] = ("Center latitude of " "selected segments") IS2_atl11_corr_attrs[ptx][XT]['latitude']['valid_min'] = -90.0 IS2_atl11_corr_attrs[ptx][XT]['latitude']['valid_max'] = 90.0 IS2_atl11_corr_attrs[ptx][XT]['latitude']['coordinates'] = \ "ref_pt delta_time longitude" # longitude of the crossover measurement IS2_atl11_corr[ptx][XT]['longitude'] = longitude['XT'].copy() IS2_atl11_fill[ptx][XT]['longitude'] = longitude['XT'].fill_value IS2_atl11_dims[ptx][XT]['longitude'] = ['ref_pt'] IS2_atl11_corr_attrs[ptx][XT]['longitude'] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx][XT]['longitude']['units'] = "degrees_east" IS2_atl11_corr_attrs[ptx][XT]['longitude']['contentType'] = "physicalMeasurement" IS2_atl11_corr_attrs[ptx][XT]['longitude']['long_name'] = "crossover longitude" IS2_atl11_corr_attrs[ptx][XT]['longitude']['standard_name'] = "longitude" IS2_atl11_corr_attrs[ptx][XT]['longitude']['source'] = "ATL06" IS2_atl11_corr_attrs[ptx][XT]['longitude']['description'] = ("Center longitude of " "selected segments") IS2_atl11_corr_attrs[ptx][XT]['longitude']['valid_min'] = -180.0 IS2_atl11_corr_attrs[ptx][XT]['longitude']['valid_max'] = 180.0 IS2_atl11_corr_attrs[ptx][XT]['longitude']['coordinates'] = \ "ref_pt delta_time latitude" # for each crossover dataset for key,val in OUTPUT['XT'].items(): # add to output IS2_atl11_corr[ptx][XT][key] = val.copy() IS2_atl11_fill[ptx][XT][key] = val.fill_value IS2_atl11_dims[ptx][XT][key] = ['ref_pt'] IS2_atl11_corr_attrs[ptx][XT][key] = collections.OrderedDict() IS2_atl11_corr_attrs[ptx][XT][key]['units'] = "meters" IS2_atl11_corr_attrs[ptx][XT][key]['contentType'] = "referenceInformation" IS2_atl11_corr_attrs[ptx][XT][key]['long_name'] = LONGNAME[key] IS2_atl11_corr_attrs[ptx][XT][key]['description'] = DESCRIPTION[key] IS2_atl11_corr_attrs[ptx][XT][key]['source'] = MODEL IS2_atl11_corr_attrs[ptx][XT][key]['reference'] = model_version IS2_atl11_corr_attrs[ptx][XT][key]['coordinates'] = \ "ref_pt delta_time latitude longitude" # output HDF5 files with interpolated surface mass balance data args = (PRD,model_version,TRK,GRAN,SCYC,ECYC,RL,VERS,AUX) file_format = '{0}_{1}_{2}{3}_{4}{5}_{6}_{7}{8}.h5' # print file information print('\t{0}'.format(file_format.format(*args))) if VERBOSE else None HDF5_ATL11_corr_write(IS2_atl11_corr, IS2_atl11_corr_attrs, CLOBBER=True, INPUT=os.path.basename(FILE), CROSSOVERS=CROSSOVERS, FILL_VALUE=IS2_atl11_fill, DIMENSIONS=IS2_atl11_dims, FILENAME=os.path.join(ddir,file_format.format(*args))) # change the permissions mode os.chmod(os.path.join(ddir,file_format.format(*args)), MODE)
version = info.get('version', '0.0.1') major_version, minor_version, _ = version.split('.', 2) major_version = int(major_version) minor_version = int(minor_version) name = 'trytond_stock_supply_day' download_url = 'http://downloads.tryton.org/%s.%s/' % (major_version, minor_version) if minor_version % 2: version = '%s.%s.dev0' % (major_version, minor_version) download_url = ('hg+http://hg.tryton.org/modules/%s#egg=%s-%s' % (name[8:], name, version)) requires = [] for dep in info.get('depends', []): if not re.match(r'(ir|res|webdav)(\W|$)', dep): requires.append(get_require_version('trytond_%s' % dep)) requires.append(get_require_version('trytond')) setup( name=name, version=version, description='Tryton module to add supply weekdays', long_description=read('README'), author='Tryton', author_email='*****@*****.**', url='http://www.tryton.org/', download_url=download_url, keywords='tryton supply day', package_dir={'trytond.modules.stock_supply_day': '.'}, packages=[
def normalize_tanchor(value): def normalize_single_tanchor(value, point='certain'): singlematch = re.compile("\(after ([^']+), before ([^']+)\)") if re.match(singlematch, value): singleout = singlematch.findall(value) if re.match(r'\d{4}-\d{1,2}-\d{1,2}$', singleout[0][0]): after = datetime.strptime(singleout[0][0], '%Y-%m-%d') else: ba, bb, ea, after = normalize_time(singleout[0][0]) if re.match(r'\d{4}-\d{1,2}-\d{1,2}$', singleout[0][1]): before = datetime.strptime(singleout[0][1], '%Y-%m-%d') else: before, bb, ea, eb = normalize_time(singleout[0][1]) return after, before elif 'after' in value: value = value.strip('after ') if re.match(r'\d{4}-\d{1,2}-\d{1,2}$', value): return datetime.strptime(value, '%Y-%m-%d'), None else: ba, bb, ea, eb = normalize_time(value) return eb, None elif 'before' in value: value = value.strip('before ') if re.match(r'\d{4}-\d{1,2}-\d{1,2}$', value): return None, datetime.strptime(value, '%Y-%m-%d') else: ba, bb, ea, eb = normalize_time(value) return None, ba elif re.match(r'\d{4}-\d{1,2}-\d{1,2}$', value): after = datetime.strptime(value, '%Y-%m-%d') return after, after else: ## temporal code ba, bb, ea, eb = normalize_time(value) if point == 'begin': return ba, ba elif point == 'end': return eb, eb else: return ba, bb, ea, eb def normalize_multi_tanchor(value): # print(value) if 'freq' in value: multimatch = re.compile("\(begin:(.+), end:(.+), freq:(.+)\)") elif 'dur' in value: multimatch = re.compile("\(begin:(.+), end:(.+), dur:(.+)=\)") else: multimatch = re.compile("\(begin:(.+), end:(.+)\)") if re.match(multimatch, value): mout = multimatch.search(value) ba, bb = normalize_single_tanchor(mout.group(1), 'begin') ea, eb = normalize_single_tanchor(mout.group(2), 'end') return ba, bb, ea, eb if 'AND' in value or 'OR' in value: return None else: if 'dis=' in value: return None if re.match(r"\(begin:(.+), end:(.+)\)", value): return normalize_multi_tanchor(value) else: return normalize_single_tanchor(value)
def gagent_check_fstrim(self, test, params, env): """ Execute "guest-fstrim" command to guest agent :param test: kvm test object :param params: Dictionary with the test parameters :param env: Dictionary with test environment. """ def get_host_scsi_disk(): """ Get latest scsi disk which enulated by scsi_debug module Return the device name and the id in host """ scsi_disk_info = process.system_output( avo_path.find_command('lsscsi'), shell=True).splitlines() scsi_debug = [_ for _ in scsi_disk_info if 'scsi_debug' in _][-1] scsi_debug = scsi_debug.split() host_id = scsi_debug[0][1:-1] device_name = scsi_debug[-1] return (host_id, device_name) def get_guest_discard_disk(session): """ Get disk without partitions in guest. """ list_disk_cmd = "ls /dev/[sh]d*|sed 's/[0-9]//p'|uniq -u" disk = session.cmd_output(list_disk_cmd).splitlines()[0] return disk def get_provisioning_mode(device, host_id): """ Get disk provisioning mode, value usually is 'writesame_16', depends on params for scsi_debug module. """ device_name = os.path.basename(device) path = "/sys/block/%s/device/scsi_disk" % device_name path += "/%s/provisioning_mode" % host_id return utils.read_one_line(path).strip() def get_allocation_bitmap(): """ get block allocation bitmap """ path = "/sys/bus/pseudo/drivers/scsi_debug/map" try: return utils.read_one_line(path).strip() except IOError: logging.warn("could not get bitmap info, path '%s' is " "not exist", path) return "" for vm in env.get_all_vms(): if vm: vm.destroy() env.unregister_vm(vm.name) host_id, disk_name = get_host_scsi_disk() provisioning_mode = get_provisioning_mode(disk_name, host_id) logging.info("Current provisioning_mode = '%s'", provisioning_mode) bitmap = get_allocation_bitmap() if bitmap: logging.debug("block allocation bitmap: %s" % bitmap) raise error.TestError("block allocation bitmap" " not empty before test.") vm_name = params["main_vm"] test_image = "scsi_debug" params["start_vm"] = "yes" params["image_name_%s" % test_image] = disk_name params["image_format_%s" % test_image] = "raw" params["image_raw_device_%s" % test_image] = "yes" params["force_create_image_%s" % test_image] = "no" params["drive_format_%s" % test_image] = "scsi-block" params["drv_extra_params_%s" % test_image] = "discard=on" params["images"] = " ".join([params["images"], test_image]) error_context.context("boot guest with disk '%s'" % disk_name, logging.info) env_process.preprocess_vm(test, params, env, vm_name) self.setup(test, params, env) timeout = float(params.get("login_timeout", 240)) session = self.vm.wait_for_login(timeout=timeout) device_name = get_guest_discard_disk(session) error_context.context("format disk '%s' in guest" % device_name, logging.info) format_disk_cmd = params["format_disk_cmd"] format_disk_cmd = format_disk_cmd.replace("DISK", device_name) session.cmd(format_disk_cmd) error_context.context("mount disk with discard options '%s'" % device_name, logging.info) mount_disk_cmd = params["mount_disk_cmd"] mount_disk_cmd = mount_disk_cmd.replace("DISK", device_name) session.cmd(mount_disk_cmd) error_context.context("write the disk with dd command", logging.info) write_disk_cmd = params["write_disk_cmd"] session.cmd(write_disk_cmd) error_context.context("Delete the file created before on disk", logging.info) delete_file_cmd = params["delete_file_cmd"] session.cmd(delete_file_cmd) # check the bitmap before trim bitmap_before_trim = get_allocation_bitmap() if not re.match(r"\d+-\d+", bitmap_before_trim): raise error.TestFail("didn't get the bitmap of the target disk") error_context.context("the bitmap_before_trim is %s" % bitmap_before_trim, logging.info) total_block_before_trim = abs(sum([eval(i) for i in bitmap_before_trim.split(',')])) error_context.context("the total_block_before_trim is %d" % total_block_before_trim, logging.info) error_context.context("execute the guest-fstrim cmd", logging.info) self.gagent.fstrim() # check the bitmap after trim bitmap_after_trim = get_allocation_bitmap() if not re.match(r"\d+-\d+", bitmap_after_trim): raise error.TestFail("didn't get the bitmap of the target disk") error_context.context("the bitmap_after_trim is %s" % bitmap_after_trim, logging.info) total_block_after_trim = abs(sum([eval(i) for i in bitmap_after_trim.split(',')])) error_context.context("the total_block_after_trim is %d" % total_block_after_trim, logging.info) if total_block_after_trim > total_block_before_trim: raise error.TestFail("the bitmap_after_trim is lager, the command" " guest-fstrim may not work") if self.vm: self.vm.destroy()
def validate_path(cls, path): return re.match(r'^/[^><|&()?]*$', path)
def extractChapterUrlsAndMetadata(self): # fetch the chapter. From that we will get almost all the # metadata and chapter list url = self.url logger.debug("URL: " + url) # use BeautifulSoup HTML parser to make everything easier to find. try: data = self._fetchUrl(url) # non-existent/removed story urls get thrown to the front page. if "<h4>Featured Story</h4>" in data: raise exceptions.StoryDoesNotExist(self.url) soup = self.make_soup(data) except HTTPError as e: if e.code == 404: raise exceptions.StoryDoesNotExist(self.url) else: raise e # if blocked, attempt login. if soup.find("div", {"class": "blocked"}) or soup.find( "li", {"class": "blocked"}): if self.performLogin(url): # performLogin raises # FailedToLogin if it fails. soup = self.make_soup(self._fetchUrl(url, usecache=False)) divstory = soup.find('div', id='story') storya = divstory.find('a', href=re.compile("^/story/\d+$")) if storya: # if there's a story link in the divstory header, this is a chapter page. # normalize story URL on chapter list. self.story.setMetadata('storyId', storya['href'].split('/', )[2]) url = "https://" + self.getSiteDomain() + storya['href'] logger.debug("Normalizing to URL: " + url) self._setURL(url) try: soup = self.make_soup(self._fetchUrl(url)) except HTTPError as e: if e.code == 404: raise exceptions.StoryDoesNotExist(self.url) else: raise e # if blocked, attempt login. if soup.find("div", {"class": "blocked"}) or soup.find( "li", {"class": "blocked"}): if self.performLogin(url): # performLogin raises # FailedToLogin if it fails. soup = self.make_soup(self._fetchUrl(url, usecache=False)) # title - first h4 tag will be title. titleh4 = soup.find('div', {'class': 'storylist'}).find('h4') self.story.setMetadata('title', stripHTML(titleh4.a)) if 'Deleted story' in self.story.getMetadata('title'): raise exceptions.StoryDoesNotExist("This story was deleted. %s" % self.url) # Find authorid and URL from... author url. a = soup.find('span', { 'class': 'author' }).find('a', href=re.compile(r"^/a/")) self.story.setMetadata('authorId', a['href'].split('/')[2]) self.story.setMetadata('authorUrl', 'https://' + self.host + a['href']) self.story.setMetadata('author', a.string) # description storydiv = soup.find("div", {"id": "story"}) self.setDescription( url, storydiv.find("blockquote", { 'class': 'summary' }).p) #self.story.setMetadata('description', storydiv.find("blockquote",{'class':'summary'}).p.string) # most of the meta data is here: metap = storydiv.find("p", {"class": "meta"}) self.story.addToList( 'category', metap.find("a", href=re.compile(r"^/category/\d+")).string) # warnings # <span class="req"><a href="/help/38" title="Medium Spoilers">[!!] </a> <a href="/help/38" title="Rape/Sexual Violence">[R] </a> <a href="/help/38" title="Violence">[V] </a> <a href="/help/38" title="Child/Underage Sex">[Y] </a></span> spanreq = metap.find("span", {"class": "story-warnings"}) if spanreq: # can be no warnings. for a in spanreq.findAll("a"): self.story.addToList('warnings', a['title']) ## perhaps not the most efficient way to parse this, using ## regexps for each rather than something more complex, but ## IMO, it's more readable and amenable to change. metastr = stripHTML(unicode(metap)).replace('\n', ' ').replace( '\t', ' ').replace(u'\u00a0', ' ') m = re.match(r".*?Rating: (.+?) -.*?", metastr) if m: self.story.setMetadata('rating', m.group(1)) m = re.match(r".*?Genres: (.+?) -.*?", metastr) if m: for g in m.group(1).split(','): self.story.addToList('genre', g) m = re.match(r".*?Characters: (.*?) -.*?", metastr) if m: for g in m.group(1).split(','): if g: self.story.addToList('characters', g) m = re.match(r".*?Published: ([0-9-]+?) -.*?", metastr) if m: self.story.setMetadata('datePublished', makeDate(m.group(1), "%Y-%m-%d")) # Updated can have more than one space after it. <shrug> m = re.match(r".*?Updated: ([0-9-]+?) +-.*?", metastr) if m: self.story.setMetadata('dateUpdated', makeDate(m.group(1), "%Y-%m-%d")) m = re.match(r".*? - ([0-9,]+?) words.*?", metastr) if m: self.story.setMetadata('numWords', m.group(1)) if metastr.endswith("Complete"): self.story.setMetadata('status', 'Completed') else: self.story.setMetadata('status', 'In-Progress') # get the chapter list first this time because that's how we # detect the need to login. storylistul = soup.find('ul', {'class': 'storylist'}) if not storylistul: # no list found, so it's a one-chapter story. self.add_chapter(self.story.getMetadata('title'), url) else: chapterlistlis = storylistul.findAll('li') for chapterli in chapterlistlis: if "blocked" in chapterli['class']: # paranoia check. We should already be logged in by now. raise exceptions.FailedToLogin(url, self.username) else: #print "chapterli.h4.a (%s)"%chapterli.h4.a self.add_chapter( chapterli.h4.a.string, u'https://%s%s' % (self.getSiteDomain(), chapterli.h4.a['href'])) return
import re import sys import metricbeat import unittest @unittest.skipUnless(re.match("(?i)win|linux|darwin|freebsd", sys.platform), "os") class Test(metricbeat.BaseTest): def test_drop_fields(self): self.render_config_template( modules=[{ "name": "system", "metricsets": ["cpu"], "period": "1s" }], processors=[{ "drop_fields": { "when": "range.system.cpu.system.pct.lt: 0.1", "fields": ["system.cpu.load"], }, }]) proc = self.start_beat() self.wait_until(lambda: self.output_lines() > 0) proc.check_kill_and_wait() output = self.read_output_json() self.assertEqual(len(output), 1) evt = output[0] self.assert_fields_are_documented(evt)
def validate_path(cls, path): return path == "/" or re.match(r'^[^/><|&()#?]+$', path)
exit() # begin reading file header_counter = 0 message = "" input_file = open(sys.argv[1], 'r') output_file = open('results.xml', 'w+') output_file.write('<?xml version="1.0" encoding="UTF-8" ?>\n') output_file.write('<testsuites>\n\t<testsuite>\n') for line in input_file: line = line.rstrip() # check for header matchObj = re.match(r'\d+: (.+)!', line, re.M) if matchObj: # print message for previous issue if header_counter > 0: output_file.write('\t\t\t<failure message="%s">\n\t\t\t</failure>\n\t\t</testcase>\n' % message) # print header for current issue output_file.write('\t\t<testcase name="%s">\n' % matchObj.group(1)) message = "" header_counter += 1 else: message = message + "\n" + line # print message for last detected issue
def parse_line(line): match = re.match(FOOD_PATT, line) ingredients = [s.strip() for s in match.group(1).split()] alergens = [s.strip() for s in match.group(2).split(',')] return Food(ingredients, alergens)
import re regex = u'откр(о|ы)(((т[^к])|(т$))|(в[^а]ш?)|й|л|е|ю|я)' fileName = input( 'Введите имя файла, в котором необходимо найти все формы глагола \'открыть\': ' ) f = open(fileName, 'r', encoding='utf-8') list_open = [] #список найденных форм глагола открыть for r in f: r = r.lower() list_word = r.split(' ') for w in list_word: w = w.strip('.,:;-()?!%*\'\"\n\r\t\f\v') m = re.match(regex, w) #ищет по заданному шаблону в начале строки if m != None: if ( w in list_open ) == 0: #проверка наличия элемента в списки (1 - значение найдено, 0 - в списке такого значения нет) list_open.append(w) f.close() print('Формы глагола \'открыть\':') c = 0 #количество найденных слов for i in list_open: print(i) c = c + 1 print('Итого: ' + str(c))
async def get_the_final(result: str) -> str: #def get_the_final(result: str) -> str: #获得五个回答 global msg msg = '' r_res = re.compile('(\d)\s+(\d)\s+(\d)\s+(\d)\s+(\d+)\W+(\d+)') m_res = re.match(r_res,result) a1,a2,a3,a4,minm,maxm = m_res.groups() url = 'https://www.yuque.com/api/v2/repos/385391/docs/2185141' url_head = {'User-Agent':'Mozilla/5.0','content-type':'application/json','X-Auth-Token':'{token}'} r = req.get(url,headers = url_head) r_content = r.text.replace('\\"','"') #创建分数统计列表 goal=[''] #创建+-统计 list1=[''] list2=[''] #计数 num = 1 pd_table = pd.read_html(r_content, encoding='utf-8', header=0)[0] PCindex = list(pd_table.index) for e_tr in PCindex: #初始化三个表 goal.append('') list1.append('') list2.append('') if pd_table.loc[e_tr,'内存升级'] != 0: if a1 != 1: list1[num] = list1[num]+'+' if a3 == 1: list1[num] = list1[num]+'+' if pd_table.loc[e_tr,'内存/G'] == 16: if a1 == 1: list2[num] = list2[num]+'-' if a1 == 3 or a1 == 4: list1[num] = list1[num]+'+' if a3 != 1: list1[num] = list1[num]+'+' if pd_table.loc[e_tr,'屏幕色域(数字为%)'] == 0: if a3 == 1: list2[num] += '-' if pd_table.loc[e_tr,'用途'] == 3: if a1 == 1: list2[num] += '-' if a2 == 1: list2[num] += '-' if a2 == 3: list2[num] += '+' if pd_table.loc[e_tr,'屏幕描述'] == 1 or pd_table.loc[e_tr,'屏幕描述'] == 2: if a1 == 3: list1[num] += '+' if a3 != 1: list1[num] += '+' if len(list1[num]) > 0: if len(list1[num]) > len(list2[num]): if int(minm) < int(pd_table.loc[e_tr,'参考售价/元']) and int(pd_table.loc[e_tr,'参考售价/元']) < int(maxm): msg += '\n'+'本款推荐:%s' % pd_table.loc[e_tr,'产品型号'] msg += '\n'+'处理器配置:%s' % pd_table.loc[e_tr,'处理器'] msg += '\n'+'色域:%s' % pd_table.loc[e_tr,'屏幕色域(数字为%)'] msg += '\n'+'参考售价:%d' % pd_table.loc[e_tr,'参考售价/元'] +'\n' goal[num] = list1[num] if len(list1[num]) == 1 and len(list2[num]) > 1: msg += '\n'+'本款推荐:%s' % pd_table.loc[e_tr,'产品型号'] msg += '\n'+'处理器配置:%s' % pd_table.loc[e_tr,'处理器'] msg += '\n'+'色域:%s' % pd_table.loc[e_tr,'屏幕色域(数字为%)'] msg += '\n'+'参考售价:%d' % pd_table.loc[e_tr,'参考售价/元']+'\n' else: goal[num]='' num += 1 try: return f'{msg}' except: return f'没有找到匹配的型号,尝试改变需求试试吧' #print(get_the_final("1 1 1 1 4000-5000"))
def _get_storage_config(self, host): pvs = self.dbapi.ipv_get_by_ihost(host.id) instance_backing = constants.LVG_NOVA_BACKING_IMAGE concurrent_disk_operations = constants.LVG_NOVA_PARAM_DISK_OPS_DEFAULT final_pvs = [] adding_pvs = [] removing_pvs = [] nova_lvg_uuid = None for pv in pvs: if (pv.lvm_vg_name == constants.LVG_NOVA_LOCAL and pv.pv_state != constants.PV_ERR): pv_path = pv.disk_or_part_device_path if (pv.pv_type == constants.PV_TYPE_PARTITION and '-part' not in pv.disk_or_part_device_path and '-part' not in pv.lvm_vg_name): # add the disk partition to the disk path partition_number = re.match('.*?([0-9]+)$', pv.lvm_pv_name).group(1) pv_path += "-part%s" % partition_number if (pv.pv_state == constants.PV_ADD): adding_pvs.append(pv_path) final_pvs.append(pv_path) elif(pv.pv_state == constants.PV_DEL): removing_pvs.append(pv_path) else: final_pvs.append(pv_path) nova_lvg_uuid = pv.ilvg_uuid if nova_lvg_uuid: lvg = self.dbapi.ilvg_get(nova_lvg_uuid) instance_backing = lvg.capabilities.get( constants.LVG_NOVA_PARAM_BACKING) concurrent_disk_operations = lvg.capabilities.get( constants.LVG_NOVA_PARAM_DISK_OPS) global_filter, update_filter = self._get_lvm_global_filter(host) values = { 'openstack::nova::storage::final_pvs': final_pvs, 'openstack::nova::storage::adding_pvs': adding_pvs, 'openstack::nova::storage::removing_pvs': removing_pvs, 'openstack::nova::storage::lvm_global_filter': global_filter, 'openstack::nova::storage::lvm_update_filter': update_filter, 'openstack::nova::storage::instance_backing': instance_backing, 'openstack::nova::storage::concurrent_disk_operations': concurrent_disk_operations, } # If NOVA is a service on a ceph-external backend, use the ephemeral_pool # and ceph_conf file that are stored in that DB entry. # If NOVA is not on any ceph-external backend, it must be on the internal # ceph backend with default "ephemeral" pool and default "/etc/ceph/ceph.conf" # config file sb_list = self.dbapi.storage_backend_get_list_by_type( backend_type=constants.SB_TYPE_CEPH_EXTERNAL) if sb_list: for sb in sb_list: if constants.SB_SVC_NOVA in sb.services: ceph_ext_obj = self.dbapi.storage_ceph_external_get(sb.id) images_rbd_pool = sb.capabilities.get('ephemeral_pool') images_rbd_ceph_conf = \ constants.CEPH_CONF_PATH + os.path.basename(ceph_ext_obj.ceph_conf) values.update({'openstack::nova::storage::images_rbd_pool': images_rbd_pool, 'openstack::nova::storage::images_rbd_ceph_conf': images_rbd_ceph_conf, }) return values
def DelugeUri(v): try: return re.match(r'(([^:]+):([^@]+)@([^:$]+)(:([0-9]+))?)', v).group(0) except: raise argparse.ArgumentTypeError("String '{}' does not match required format".format(v))
def __call__(self, cfg, gpu_no): print("calling program with gpu "+str(gpu_no)) cmd = ['python3', self.program, '--cfg', str(cfg), str(gpu_no)] outs = "" #outputval = 0 outputval = "" try: outs = str(check_output(cmd,stderr=STDOUT, timeout=40000)) if os.path.isfile(logfile): with open(logfile,'a') as f_handle: f_handle.write(outs) else: with open(logfile,'w') as f_handle: f_handle.write(outs) outs = outs.split("\\n") #TODO_CHRIS hacky solution #outputval = 0 #for i in range(len(outs)-1,1,-1): for i in range(len(outs)-1,-1,-1): #if re.match("^\d+?\.\d+?$", outs[-i]) is None: #CHRIS changed outs[-i] to outs[i] print(outs[i]) if re.match("^\(\-?\d+\.?\d*\e?\+?\-?\d*\,\s\-?\d+\.?\d*\e?\+?\-?\d*\)$", outs[i]) is None: #do nothing a=1 else: #outputval = -1 * float(outs[-i]) outputval = outs[i] #if np.isnan(outputval): # outputval = 0 except subprocess.CalledProcessError as e: traceback.print_exc() print (e.output) except: print ("Unexpected error:") traceback.print_exc() print (outs) #outputval = 0 #TODO_CHRIS hacky solution tuple_str1 = '' tuple_str2 = '' success = True i = 1 try: while outputval[i] != ',': tuple_str1 += outputval[i] i += 1 i += 1 while outputval[i] != ')': tuple_str2 += outputval[i] i += 1 except: print("error in receiving answer from gpu " + str(gpu_no)) success = False try: tuple = (float(tuple_str1),float(tuple_str2),success) except: tuple = (0.0,0.0,False) #return outputval return tuple
return True else: print 'FAILED' print_result(expect_file, result_data, True) return False except Exception as e: print '\nFAILED:', e.message return False if __name__ == '__main__': passed = [] failed = [] disabled = [] for mvm, test_dir, test in load_tests(): if any(re.match(regex, test) for regex in disabled_tests): disabled.append(test) continue if run_test(mvm, test_dir, test): passed.append(test) else: failed.append(test) print '\n\n Summary:', len(passed), 'PASSED,', len(failed), 'FAILED,', len(disabled), 'DISABLED' print '\nPASSED:', passed print '\nFAILED:', failed print '\nDISABLED:', disabled print '\n\n'
def is_bucket_specified_in_domain_name(path, headers): host = headers.get('host', '') return re.match(r'.*s3(\-website)?\.([^\.]+\.)?amazonaws.com', host)
def forward_request(self, method, path, data, headers): # parse path and query params parsed_path = urlparse.urlparse(path) # Make sure we use 'localhost' as forward host, to ensure moto uses path style addressing. # Note that all S3 clients using LocalStack need to enable path style addressing. if 's3.amazonaws.com' not in headers.get('host', ''): headers['host'] = 'localhost' # check content md5 hash integrity if not a copy request if 'Content-MD5' in headers and not self.is_s3_copy_request(headers, path): response = check_content_md5(data, headers) if response is not None: return response modified_data = None # check bucket name bucket_name = get_bucket_name(path, headers) if method == 'PUT' and not re.match(BUCKET_NAME_REGEX, bucket_name): if len(parsed_path.path) <= 1: return error_response('Unable to extract valid bucket name. Please ensure that your AWS SDK is ' + 'configured to use path style addressing, or send a valid <Bucket>.s3.amazonaws.com "Host" header', 'InvalidBucketName', status_code=400) return error_response('The specified bucket is not valid.', 'InvalidBucketName', status_code=400) # TODO: For some reason, moto doesn't allow us to put a location constraint on us-east-1 to_find = to_bytes('<LocationConstraint>us-east-1</LocationConstraint>') if data and data.startswith(to_bytes('<')) and to_find in data: modified_data = data.replace(to_find, to_bytes('')) # If this request contains streaming v4 authentication signatures, strip them from the message # Related isse: https://github.com/localstack/localstack/issues/98 # TODO we should evaluate whether to replace moto s3 with scality/S3: # https://github.com/scality/S3/issues/237 if headers.get('x-amz-content-sha256') == 'STREAMING-AWS4-HMAC-SHA256-PAYLOAD': modified_data = strip_chunk_signatures(modified_data or data) headers['content-length'] = headers.get('x-amz-decoded-content-length') # POST requests to S3 may include a "${filename}" placeholder in the # key, which should be replaced with an actual file name before storing. if method == 'POST': original_data = modified_data or data expanded_data = multipart_content.expand_multipart_filename(original_data, headers) if expanded_data is not original_data: modified_data = expanded_data # If no content-type is provided, 'binary/octet-stream' should be used # src: https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html if method == 'PUT' and not headers.get('content-type'): headers['content-type'] = 'binary/octet-stream' # persist this API call to disk persistence.record('s3', method, path, data, headers) # parse query params query = parsed_path.query path = parsed_path.path bucket = path.split('/')[1] query_map = urlparse.parse_qs(query, keep_blank_values=True) # remap metadata query params (not supported in moto) to request headers append_metadata_headers(method, query_map, headers) if query == 'notification' or 'notification' in query_map: # handle and return response for ?notification request response = handle_notification_request(bucket, method, data) return response if query == 'cors' or 'cors' in query_map: if method == 'GET': return get_cors(bucket) if method == 'PUT': return set_cors(bucket, data) if method == 'DELETE': return delete_cors(bucket) if query == 'lifecycle' or 'lifecycle' in query_map: if method == 'GET': return get_lifecycle(bucket) if method == 'PUT': return set_lifecycle(bucket, data) if query == 'replication' or 'replication' in query_map: if method == 'GET': return get_replication(bucket) if method == 'PUT': return set_replication(bucket, data) if query == 'encryption' or 'encryption' in query_map: if method == 'GET': return get_encryption(bucket) if method == 'PUT': return set_encryption(bucket, data) if query == 'object-lock' or 'object-lock' in query_map: if method == 'GET': return get_object_lock(bucket) if method == 'PUT': return set_object_lock(bucket, data) if modified_data is not None: return Request(data=modified_data, headers=headers, method=method) return True
def pm_button_callback(_, __, query): if re.match('engine_pm', query.data): return True
def __init__(self, inputList, exactMatches={}, patternMatches={}): defaultArgExactMatches = { '-o' : (1, ArgumentListFilter.outputFileCallback), '-c' : (0, ArgumentListFilter.compileOnlyCallback), '-E' : (0, ArgumentListFilter.preprocessOnlyCallback), '-S' : (0, ArgumentListFilter.assembleOnlyCallback), '--verbose' : (0, ArgumentListFilter.verboseFlagCallback), '--param' : (1, ArgumentListFilter.defaultBinaryCallback), '-aux-info' : (1, ArgumentListFilter.defaultBinaryCallback), #iam: presumably the len(inputFiles) == 0 in this case '--version' : (0, ArgumentListFilter.compileOnlyCallback), '-v' : (0, ArgumentListFilter.compileOnlyCallback), #warnings (apart from the regex below) '-w' : (0, ArgumentListFilter.compileOnlyCallback), '-W' : (0, ArgumentListFilter.compileOnlyCallback), #iam: if this happens, then we need to stop and think. '-emit-llvm' : (0, ArgumentListFilter.emitLLVMCallback), #iam: buildworld and buildkernel use these flags '-pipe' : (0, ArgumentListFilter.compileUnaryCallback), '-undef' : (0, ArgumentListFilter.compileUnaryCallback), '-nostdinc' : (0, ArgumentListFilter.compileUnaryCallback), '-nostdinc++' : (0, ArgumentListFilter.compileUnaryCallback), '-Qunused-arguments' : (0, ArgumentListFilter.compileUnaryCallback), '-no-integrated-as' : (0, ArgumentListFilter.compileUnaryCallback), '-integrated-as' : (0, ArgumentListFilter.compileUnaryCallback), #iam: gcc uses this in both compile and link, but clang only in compile '-pthread' : (0, ArgumentListFilter.compileUnaryCallback), # I think this is a compiler search path flag. It is # clang only, so I don't think it counts as a separate CPP # flag. Android uses this flag with its clang builds. '-nostdlibinc': (0, ArgumentListFilter.compileUnaryCallback), #iam: arm stuff '-mno-omit-leaf-frame-pointer' : (0, ArgumentListFilter.compileUnaryCallback), '-maes' : (0, ArgumentListFilter.compileUnaryCallback), '-mno-aes' : (0, ArgumentListFilter.compileUnaryCallback), '-mavx' : (0, ArgumentListFilter.compileUnaryCallback), '-mno-avx' : (0, ArgumentListFilter.compileUnaryCallback), '-mcmodel=kernel' : (0, ArgumentListFilter.compileUnaryCallback), '-mno-red-zone' : (0, ArgumentListFilter.compileUnaryCallback), '-mmmx' : (0, ArgumentListFilter.compileUnaryCallback), '-mno-mmx' : (0, ArgumentListFilter.compileUnaryCallback), '-msse' : (0, ArgumentListFilter.compileUnaryCallback), '-mno-sse2' : (0, ArgumentListFilter.compileUnaryCallback), '-msse2' : (0, ArgumentListFilter.compileUnaryCallback), '-mno-sse3' : (0, ArgumentListFilter.compileUnaryCallback), '-msse3' : (0, ArgumentListFilter.compileUnaryCallback), '-mno-sse' : (0, ArgumentListFilter.compileUnaryCallback), '-msoft-float' : (0, ArgumentListFilter.compileUnaryCallback), '-m3dnow' : (0, ArgumentListFilter.compileUnaryCallback), '-mno-3dnow' : (0, ArgumentListFilter.compileUnaryCallback), '-m32': (0, ArgumentListFilter.compileUnaryCallback), '-m64': (0, ArgumentListFilter.compileUnaryCallback), '-mstackrealign': (0, ArgumentListFilter.compileUnaryCallback), # Preprocessor assertion '-A' : (1, ArgumentListFilter.compileBinaryCallback), '-D' : (1, ArgumentListFilter.compileBinaryCallback), '-U' : (1, ArgumentListFilter.compileBinaryCallback), # Dependency generation '-M' : (0, ArgumentListFilter.dependencyOnlyCallback), '-MM' : (0, ArgumentListFilter.dependencyOnlyCallback), '-MF' : (1, ArgumentListFilter.dependencyBinaryCallback), '-MG' : (0, ArgumentListFilter.dependencyOnlyCallback), '-MP' : (0, ArgumentListFilter.dependencyOnlyCallback), '-MT' : (1, ArgumentListFilter.dependencyBinaryCallback), '-MQ' : (1, ArgumentListFilter.dependencyBinaryCallback), '-MD' : (0, ArgumentListFilter.dependencyOnlyCallback), '-MMD' : (0, ArgumentListFilter.dependencyOnlyCallback), # Include '-I' : (1, ArgumentListFilter.compileBinaryCallback), '-idirafter' : (1, ArgumentListFilter.compileBinaryCallback), '-include' : (1, ArgumentListFilter.compileBinaryCallback), '-imacros' : (1, ArgumentListFilter.compileBinaryCallback), '-iprefix' : (1, ArgumentListFilter.compileBinaryCallback), '-iwithprefix' : (1, ArgumentListFilter.compileBinaryCallback), '-iwithprefixbefore' : (1, ArgumentListFilter.compileBinaryCallback), '-isystem' : (1, ArgumentListFilter.compileBinaryCallback), '-isysroot' : (1, ArgumentListFilter.compileBinaryCallback), '-iquote' : (1, ArgumentListFilter.compileBinaryCallback), '-imultilib' : (1, ArgumentListFilter.compileBinaryCallback), # Language '-ansi' : (0, ArgumentListFilter.compileUnaryCallback), '-pedantic' : (0, ArgumentListFilter.compileUnaryCallback), '-x' : (1, ArgumentListFilter.compileBinaryCallback), # Debug '-g' : (0, ArgumentListFilter.compileUnaryCallback), '-g0' : (0, ArgumentListFilter.compileUnaryCallback), #iam: clang not gcc '-ggdb' : (0, ArgumentListFilter.compileUnaryCallback), '-ggdb3' : (0, ArgumentListFilter.compileUnaryCallback), '-gdwarf-2' : (0, ArgumentListFilter.compileUnaryCallback), '-gdwarf-3' : (0, ArgumentListFilter.compileUnaryCallback), '-gline-tables-only' : (0, ArgumentListFilter.compileUnaryCallback), '-p' : (0, ArgumentListFilter.compileUnaryCallback), '-pg' : (0, ArgumentListFilter.compileUnaryCallback), # Optimization '-O' : (0, ArgumentListFilter.compileUnaryCallback), '-O0' : (0, ArgumentListFilter.compileUnaryCallback), '-O1' : (0, ArgumentListFilter.compileUnaryCallback), '-O2' : (0, ArgumentListFilter.compileUnaryCallback), '-O3' : (0, ArgumentListFilter.compileUnaryCallback), '-Os' : (0, ArgumentListFilter.compileUnaryCallback), '-Ofast' : (0, ArgumentListFilter.compileUnaryCallback), '-Og' : (0, ArgumentListFilter.compileUnaryCallback), # Component-specifiers '-Xclang' : (1, ArgumentListFilter.compileBinaryCallback), '-Xpreprocessor' : (1, ArgumentListFilter.defaultBinaryCallback), '-Xassembler' : (1, ArgumentListFilter.defaultBinaryCallback), '-Xlinker' : (1, ArgumentListFilter.defaultBinaryCallback), # Linker '-l' : (1, ArgumentListFilter.linkBinaryCallback), '-L' : (1, ArgumentListFilter.linkBinaryCallback), '-T' : (1, ArgumentListFilter.linkBinaryCallback), '-u' : (1, ArgumentListFilter.linkBinaryCallback), #iam: specify the entry point '-e' : (1, ArgumentListFilter.linkBinaryCallback), # runtime library search path '-rpath' : (1, ArgumentListFilter.linkBinaryCallback), # iam: showed up in buildkernel '-shared' : (0, ArgumentListFilter.linkUnaryCallback), '-static' : (0, ArgumentListFilter.linkUnaryCallback), '-pie' : (0, ArgumentListFilter.linkUnaryCallback), '-nostdlib' : (0, ArgumentListFilter.linkUnaryCallback), '-nodefaultlibs' : (0, ArgumentListFilter.linkUnaryCallback), '-rdynamic' : (0, ArgumentListFilter.linkUnaryCallback), # darwin flags '-dynamiclib' : (0, ArgumentListFilter.linkUnaryCallback), '-current_version' : (1, ArgumentListFilter.linkBinaryCallback), '-compatibility_version' : (1, ArgumentListFilter.linkBinaryCallback), # dragonegg mystery argument '--64' : (0, ArgumentListFilter.compileUnaryCallback), # binutils nonsense '-print-multi-directory' : (0, ArgumentListFilter.compileUnaryCallback), '-print-multi-lib' : (0, ArgumentListFilter.compileUnaryCallback), '-print-libgcc-file-name' : (0, ArgumentListFilter.compileUnaryCallback), # Code coverage instrumentation '-fprofile-arcs' : (0, ArgumentListFilter.compileLinkUnaryCallback), '-coverage' : (0, ArgumentListFilter.compileLinkUnaryCallback), '--coverage' : (0, ArgumentListFilter.compileLinkUnaryCallback), # # BD: need to warn the darwin user that these flags will rain on their parade # (the Darwin ld is a bit single minded) # # 1) compilation with -fvisibility=hidden causes trouble when we try to # attach bitcode filenames to an object file. The global symbols in object # files get turned into local symbols when we invoke 'ld -r' # # 2) all stripping commands (e.g., -dead_strip) remove the __LLVM segment after # linking # # Update: found a fix for problem 1: add flag -keep_private_externs when # calling ld -r. # '-Wl,-dead_strip' : (0, ArgumentListFilter.darwinWarningLinkUnaryCallback), } # # Patterns for other command-line arguments: # - inputFiles # - objectFiles (suffix .o) # - libraries + linker options as in -lxxx -Lpath or -Wl,xxxx # - preprocessor options as in -DXXX -Ipath # - compiler warning options: -W.... # - optimiziation and other flags: -f... # defaultArgPatterns = { r'^.+\.(c|cc|cpp|C|cxx|i|s|S|bc)$' : (0, ArgumentListFilter.inputFileCallback), # FORTRAN file types r'^.+\.([fF](|[0-9][0-9]|or|OR|pp|PP))$' : (0, ArgumentListFilter.inputFileCallback), #iam: the object file recogition is not really very robust, object files # should be determined by their existance and contents... r'^.+\.(o|lo|So|so|po|a|dylib)$' : (0, ArgumentListFilter.objectFileCallback), #iam: library.so.4.5.6 probably need a similar pattern for .dylib too. r'^.+\.dylib(\.\d)+$' : (0, ArgumentListFilter.objectFileCallback), r'^.+\.(So|so)(\.\d)+$' : (0, ArgumentListFilter.objectFileCallback), r'^-(l|L).+$' : (0, ArgumentListFilter.linkUnaryCallback), r'^-I.+$' : (0, ArgumentListFilter.compileUnaryCallback), r'^-D.+$' : (0, ArgumentListFilter.compileUnaryCallback), r'^-U.+$' : (0, ArgumentListFilter.compileUnaryCallback), r'^-Wl,.+$' : (0, ArgumentListFilter.linkUnaryCallback), r'^-W(?!l,).*$' : (0, ArgumentListFilter.compileUnaryCallback), r'^-f.+$' : (0, ArgumentListFilter.compileUnaryCallback), r'^-rtlib=.+$' : (0, ArgumentListFilter.linkUnaryCallback), r'^-std=.+$' : (0, ArgumentListFilter.compileUnaryCallback), r'^-stdlib=.+$' : (0, ArgumentListFilter.compileLinkUnaryCallback), r'^-mtune=.+$' : (0, ArgumentListFilter.compileUnaryCallback), r'^--sysroot=.+$' : (0, ArgumentListFilter.compileUnaryCallback), r'^-print-prog-name=.*$' : (0, ArgumentListFilter.compileUnaryCallback), r'^-print-file-name=.*$' : (0, ArgumentListFilter.compileUnaryCallback), } #iam: try and keep track of the files, input object, and output self.inputList = inputList self.inputFiles = [] self.objectFiles = [] self.outputFilename = None #iam: try and split the args into linker and compiler switches self.compileArgs = [] self.linkArgs = [] self.isVerbose = False self.isDependencyOnly = False self.isPreprocessOnly = False self.isAssembleOnly = False self.isAssembly = False self.isCompileOnly = False self.isEmitLLVM = False argExactMatches = dict(defaultArgExactMatches) argExactMatches.update(exactMatches) argPatterns = dict(defaultArgPatterns) argPatterns.update(patternMatches) self._inputArgs = collections.deque(inputList) #iam: parse the cmd line, bailing if we discover that there will be no second phase. while (self._inputArgs and not (self.isAssembly or self.isAssembleOnly or self.isPreprocessOnly)): # Get the next argument currentItem = self._inputArgs.popleft() _logger.debug('Trying to match item ' + currentItem) # First, see if this exact flag has a handler in the table. # This is a cheap test. Otherwise, see if the input matches # some pattern with a handler that we recognize if currentItem in argExactMatches: (arity, handler) = argExactMatches[currentItem] flagArgs = self._shiftArgs(arity) handler(self, currentItem, *flagArgs) else: matched = False for pattern, (arity, handler) in argPatterns.items(): if re.match(pattern, currentItem): flagArgs = self._shiftArgs(arity) handler(self, currentItem, *flagArgs) matched = True break # If no action has been specified, this is a zero-argument # flag that we should just keep. if not matched: _logger.warning('Did not recognize the compiler flag "%s"', currentItem) self.compileUnaryCallback(currentItem) if DUMPING: self.dump()
lines_out = [] lines_out.append('# Лабораторная работа №' + sys.argv[2]) code_flag = False for i in lines: s = i if(i.strip() == ''): continue elif(s.strip()[0] == '#'): if(code_flag): lines_out.append('```') code_flag = False if(re.match(r'^#[0-9]+$', s.strip()) == None): lines_out.append(s.strip()[1:]) else: lines_out.append('## ' + s.strip()[1:]) elif(re.match(r'^.*#[0-9]+$', s) == None): if(code_flag == False): lines_out.append('```shell') code_flag = True lines_out.append(i) else: s = s.split('#') if(code_flag): lines_out.append('```') code_flag = False lines_out.append('## ' + s[-1].strip()) lines_out.append('```shell')
def main(meshfile,file,iexpt=10,iversn=22,yrflag=3,bio_path=None) : # # Trim input netcdf file name being appropriate for reading # meshfile=str(meshfile)[2:-2] logger.info("Reading mesh information from %s."%(meshfile)) # # Read mesh file containing grid and coordinate information. # Note that for now, we are using T-grid in vertical which may need # to be improved by utilizing W-point along the vertical axis. # hdept,gdept,mbathy,mbathy_u,mbathy_v,mask,e3t,plon,plat=read_grid(meshfile) logger.warning("Reading grid information from regional.grid.[ab] (not completed)") # # Convert from P-point (i.e. NEMO grid) to U and V HYCOM grids # mask_u=p2u_2d(mask) mask_v=p2v_2d(mask) # # Read regional.grid.[ab] # Grid angle is not used for this product because all quantities are # on regular rectangular grid points. # angle=numpy.zeros(plon.shape) # # Number vertical layers in T-point. # nlev=gdept.size # # layer thickness in the absence of layer partial steps. # dt = gdept[1:] - gdept[:-1] # # Prepare/read input data file (in netcdf format). Reference time is 1950-01-01 # logger.info("Reading data files.") file=str(file).strip()[2:-2] dirname=os.path.dirname(file) logger.debug("file name is {}".format(file)) logger.debug("dirname is {}".format(dirname)) logger.debug("basename is {}".format(os.path.basename(file))) m=re.match("(MERCATOR-PHY-24-)(.*\.nc)",os.path.basename(file)) logger.debug("file prefix is {}".format(file_pre)) ### m=re.match(file_pre,os.path.basename(file)) if not m: msg="File %s is not a grid2D file, aborting"%file logger.error(msg) raise ValueError(msg) #fileinput0=os.path.join(dirname+"/"+"MERCATOR-PHY-24-"+m.group(2)) file_date=file[-16:-6] fileinput0=file print((file_date,file)) next_day=datetime.datetime.strptime(file_date, '%Y-%m-%d')+datetime.timedelta(days=1) fileinput1=datetime.datetime.strftime(next_day,'%Y%m%d') fileinput1=os.path.join(dirname+"/"+file_pre+fileinput1+'.nc') logger.info("Reading from %s"%(fileinput0)) ncid0=netCDF4.Dataset(fileinput0,"r") if timeavg_method==1 and os.path.isfile(fileinput1) : logger.info("timeavg_method=1, Reading from %s"%(fileinput1)) ncid1=netCDF4.Dataset(fileinput1,"r") # # Calculate temporal averaged temperature, salinity, and velocity # uo = 0.5*(ncid0.variables["uo"][0,:,:,:]+ ncid1.variables["uo"][0,:,:,:]) vo = 0.5*(ncid0.variables["vo"][0,:,:,:]+ ncid1.variables["vo"][0,:,:,:]) salt = 0.5*(ncid0.variables["so"][0,:,:,:]+ ncid1.variables["so"][0,:,:,:]) temp = 0.5*(ncid0.variables["thetao"][0,:,:,:]+ncid1.variables["thetao"][0,:,:,:]) ssh = numpy.squeeze(0.5*(ncid0.variables["zos"][0,:,:]+ncid1.variables["zos"][0,:,:])) else: # # Set variables based on current file when timeavg_method ~=1 or the next netcdf file is not available logger.debug("time average method set to {}".format(timeavg_method)) uo = ncid0.variables["uo"][0,:,:,:] vo = ncid0.variables["vo"][0,:,:,:] salt = ncid0.variables["so"][0,:,:,:] temp = ncid0.variables["thetao"][0,:,:,:] ssh = numpy.squeeze(ncid0.variables["zos"][0,:,:]) # # I will account these values afterward. Because in the current version, I am accounting for missing values using a gap-filling methodology. # logger.debug("getting _FillValue") uofill=ncid0.variables["uo"]._FillValue vofill=ncid0.variables["vo"]._FillValue slfill=ncid0.variables["so"]._FillValue tlfill=ncid0.variables["thetao"]._FillValue shfill=ncid0.variables["zos"]._FillValue # Set time logger.info("Set time.") time=ncid0.variables["time"][0] unit=ncid0.variables["time"].units tmp=cfunits.Units(unit) refy,refm,refd=(1950,1,1) tmp2=cfunits.Units("hours since %d-%d-%d 00:00:00"%(refy,refm,refd)) tmp3=int(numpy.round(cfunits.Units.conform(time,tmp,tmp2))) mydt = datetime.datetime(refy,refm,refd,0,0,0) + datetime.timedelta(hours=tmp3) # Then calculate dt. Phew! if timeavg_method==1 and os.path.isfile(fileinput1) : fnametemplate="archv.%Y_%j_%H" deltat=datetime.datetime(refy,refm,refd,0,0,0) + \ datetime.timedelta(hours=tmp3) + \ datetime.timedelta(hours=12) oname=deltat.strftime(fnametemplate) else: # # I am assuming that daily mean can be set at 00 instead of 12 # for cases that there is no information of next day. # fnametemplate="archv.%Y_%j" deltat=datetime.datetime(refy,refm,refd,0,0,0) + \ datetime.timedelta(hours=tmp3) oname=deltat.strftime(fnametemplate) + '_00' # model day refy, refm, refd=(1900,12,31) model_day= deltat-datetime.datetime(refy,refm,refd,0,0,0) model_day=model_day.days logger.info("Model day in HYCOM is %s"%str(model_day)) if bio_path: jdm,idm=numpy.shape(plon) points = numpy.transpose(((plat.flatten(),plon.flatten()))) delta = mydt.strftime( '%Y-%m-%d') # filename format MERCATOR-BIO-14-2013-01-05-00 print((bio_path,delta)) idx,biofname=search_biofile(bio_path,delta) if idx >7: msg="No available BIO file within a week difference with PHY" logger.error(msg) raise ValueError(msg) logger.info("BIO file %s reading & interpolating to 1/12 deg grid cells ..."%biofname) ncidb=netCDF4.Dataset(biofname,"r") blon=ncidb.variables["longitude"][:]; blat=ncidb.variables["latitude"][:] minblat=blat.min() no3=ncidb.variables["NO3"][0,:,:,:]; no3[numpy.abs(no3)>1e+10]=numpy.nan po4=ncidb.variables["PO4"][0,:,:,:] si=ncidb.variables["Si"][0,:,:,:] po4[numpy.abs(po4)>1e+10]=numpy.nan si[numpy.abs(si)>1e+10]=numpy.nan # TODO: Ineed to improve this part nz,ny,nx=no3.shape dummy=numpy.zeros((nz,ny,nx+1)) dummy[:,:,:nx]=no3;dummy[:,:,-1]=no3[:,:,-1] no3=dummy dummy=numpy.zeros((nz,ny,nx+1)) dummy[:,:,:nx]=po4;dummy[:,:,-1]=po4[:,:,-1] po4=dummy dummy=numpy.zeros((nz,ny,nx+1)) dummy[:,:,:nx]=si;dummy[:,:,-1]=si[:,:,-1] si=dummy dummy=numpy.zeros((nx+1)) dummy[:nx]=blon blon=dummy blon[-1]=-blon[0] # TODO: Note that the coordinate files are for global configuration while # the data file saved for latitude larger than 30. In the case you change your data file coordinate # configuration you need to modify the following lines bio_coordfile=bio_path[:-4]+"/GLOBAL_ANALYSIS_FORECAST_BIO_001_014_COORD/GLO-MFC_001_014_mask.nc" biocrd=netCDF4.Dataset(bio_coordfile,"r") blat2 = biocrd.variables['latitude'][:] index=numpy.where(blat2>=minblat)[0] depth_lev = biocrd.variables['deptho_lev'][index[0]:,:] # # # dummy=numpy.zeros((ny,nx+1)) dummy[:,:nx]=depth_lev;dummy[:,-1]=depth_lev[:,-1] depth_lev=dummy depth_lev[depth_lev>50]=0 depth_lev=depth_lev.astype('i') dummy_no3=no3 dummy_po4=po4 dummy_si=si for j in range(ny): for i in range(nx): dummy_no3[depth_lev[j,i]:nz-2,j,i]=no3[depth_lev[j,i]-1,j,i] dummy_po4[depth_lev[j,i]:nz-2,j,i]=po4[depth_lev[j,i]-1,j,i] dummy_si[depth_lev[j,i]:nz-2,j,i]=si[depth_lev[j,i]-1,j,i] no3=dummy_no3 po4=dummy_po4 si=dummy_si # po4 = po4 * 106.0 * 12.01 si = si * 6.625 * 12.01 no3 = no3 * 6.625 * 12.01 logger.info("Read, trim, rotate NEMO velocities.") u=numpy.zeros((nlev,mbathy.shape[0],mbathy.shape[1])) v=numpy.zeros((nlev,mbathy.shape[0],mbathy.shape[1])) utmp=numpy.zeros((mbathy.shape)) vtmp=numpy.zeros((mbathy.shape)) # # Metrices to detect carrefully bottom at p-, u-, and v-grid points.While I have used 3D, mask data,following methods are good enough for now. # if mbathy_method == 1 : ip = mbathy == -1 iu = mbathy_u == -1 iv = mbathy_v == -1 else: ip = mask == 0 iu = mask_u == 0 iv = mask_v == 0 # # Read 3D velocity field to calculate barotropic velocity # # Estimate barotropic velocities using partial steps along the vertical axis. Note that for the early version of this code, # I used dt = gdept[1:] - gdept[:-1] on NEMO t-grid. Furthermore, you may re-calculate this part on vertical grid cells for future. # logger.info("Calculate barotropic velocities.") ubaro,vbaro=calc_uvbaro(uo,vo,e3t,iu,iv) # # Save 2D fields (here only ubaro & vbaro) # zeros=numpy.zeros(mbathy.shape) #flnm = open(oname+'.txt', 'w') #flnm.write(oname) #flnm.close() ssh = numpy.where(numpy.abs(ssh)>1000,0.,ssh*9.81) # NB: HYCOM srfhgt is in geopotential ... # outfile = abf.ABFileArchv("./data/"+oname,"w",iexpt=iexpt,iversn=iversn,yrflag=yrflag,) outfile.write_field(zeros, ip,"montg1" ,0,model_day,1,0) outfile.write_field(ssh, ip,"srfhgt" ,0,model_day,0,0) outfile.write_field(zeros, ip,"surflx" ,0,model_day,0,0) # Not used outfile.write_field(zeros, ip,"salflx" ,0,model_day,0,0) # Not used outfile.write_field(zeros, ip,"bl_dpth" ,0,model_day,0,0) # Not used outfile.write_field(zeros, ip,"mix_dpth",0,model_day,0,0) # Not used outfile.write_field(ubaro, iu,"u_btrop" ,0,model_day,0,0) outfile.write_field(vbaro, iv,"v_btrop" ,0,model_day,0,0) # if bio_path: logger.info("Calculate baroclinic velocities, temperature, and salinity data as well as BIO field.") else: logger.info("Calculate baroclinic velocities, temperature, and salinity data.") for k in numpy.arange(u.shape[0]) : if bio_path: no3k=interpolate2d(blat, blon, no3[k,:,:], points).reshape((jdm,idm)) no3k = maplev(no3k) po4k=interpolate2d(blat, blon, po4[k,:,:], points).reshape((jdm,idm)) po4k = maplev(po4k) si_k=interpolate2d(blat, blon, si[k,:,:], points).reshape((jdm,idm)) si_k = maplev(si_k) if k%10==0 : logger.info("Writing 3D variables including BIO, level %d of %d"%(k+1,u.shape[0])) else: if k%10==0 : logger.info("Writing 3D variables, level %d of %d"%(k+1,u.shape[0])) # # uo[k,:,:]=numpy.where(numpy.abs(uo[k,:,:])<10,uo[k,:,:],0) vo[k,:,:]=numpy.where(numpy.abs(vo[k,:,:])<10,vo[k,:,:],0) # Baroclinic velocity (in HYCOM U- and V-grid) ul = p2u_2d(numpy.squeeze(uo[k,:,:])) - ubaro vl = p2v_2d(numpy.squeeze(vo[k,:,:])) - vbaro ul[iu]=spval vl[iv]=spval # Layer thickness dtl=numpy.zeros(mbathy.shape) # Use dt for the water column except the nearest cell to bottom if thickness_method==1: if k < u.shape[0]-1 : J,I = numpy.where(mbathy>k) e3=(e3t[k,:,:]) dtl[J,I]=dt[k] J,I = numpy.where(mbathy==k) dtl[J,I]=e3[J,I] else: e3=(e3t[k,:,:]) J,I = numpy.where(mbathy==k) dtl[J,I]=e3[J,I] # Use partial cells for the whole water column. else : J,I = numpy.where(mbathy>=k) dtl[J,I]=e3t[k,J,I] # Salinity sl = salt[k,:,:] # Temperature tl = temp[k,:,:] # Need to be carefully treated in order to minimize artifacts to the resulting [ab] files. if fillgap_method==1: J,I= numpy.where(mbathy<k) sl = maplev(numpy.where(numpy.abs(sl)<1e2,sl,numpy.nan)) sl[J,I]=spval J,I= numpy.where(mbathy<k) tl = maplev(numpy.where(numpy.abs(tl)<1e2,tl,numpy.nan)) tl[J,I]=spval else: sl = numpy.where(numpy.abs(sl)<1e2,sl,numpy.nan) sl = numpy.minimum(numpy.maximum(maplev(sl),25),80.) tl = numpy.where(numpy.abs(tl)<=5e2,tl,numpy.nan) tl = numpy.minimum(numpy.maximum(maplev(tl),-5.),50.) # Thickness dtl = maplev(dtl) if k > 0 : with numpy.errstate(invalid='ignore'): K= numpy.where(dtl < 1e-4) sl[K] = sl_above[K] tl[K] = tl_above[K] # sl[ip]=spval tl[ip]=spval # Save 3D fields outfile.write_field(ul ,iu,"u-vel.",0,model_day,k+1,0) outfile.write_field(vl ,iv,"v-vel.",0,model_day,k+1,0) outfile.write_field(dtl*onem,ip,"thknss",0,model_day,k+1,0) outfile.write_field(tl ,ip,"temp" , 0,model_day,k+1,0) outfile.write_field(sl ,ip,"salin" ,0,model_day,k+1,0) if bio_path : outfile.write_field(no3k ,ip,"ECO_no3" ,0,model_day,k+1,0) outfile.write_field(po4k ,ip,"ECO_pho" ,0,model_day,k+1,0) outfile.write_field(si_k ,ip,"ECO_sil" ,0,model_day,k+1,0) tl_above=numpy.copy(tl) sl_above=numpy.copy(sl) outfile.close() ncid0.close() if timeavg_method==1 and os.path.isfile(fileinput1) : ncid1.close() if bio_path : ncidb.close()
def is_agent_path(path): path = os.path.basename(path) return not re.match(AGENT_NAME_PATTERN, path) is None
def lookup_by_isbn(number, forceUpdate=False): isbn, price = _process_isbn(number) print("Looking up isbn", isbn, "with price", price) # if length of isbn>0 and isn't "n/a" or "none" if len(isbn) > 0 and not re.match("^n(\s|/){0,1}a|none", isbn, re.I): # first we check our database titles = Title.select(Title.q.isbn == isbn) ##print titles #debug known_title = False the_titles = list(titles) if (len(the_titles) > 0) and (not forceUpdate): ##print "in titles" known_title = the_titles[0] ProductName = the_titles[0].booktitle.format() authors = [] if len(the_titles[0].author) > 0: authors = [x.authorName.format() for x in the_titles[0].author] authors_as_string = ", ".join(authors) categories = [] if len(the_titles[0].categorys) > 0: ##print len(the_titles[0].categorys) ##print the_titles[0].categorys categories = [x.categoryName.format() for x in the_titles[0].categorys] categories_as_string = ", ".join(categories) if price == 0: if len(the_titles[0].books) > 0: ListPrice = max([x.listprice for x in the_titles[0].books]) else: ListPrice = 0 else: ListPrice = price Manufacturer = the_titles[0].publisher.format() Format = the_titles[0].type.format() Kind = the_titles[0].kind.kindName orig_isbn = the_titles[0].origIsbn.format() # if the_titles[0].images: # large_url = the_titles[0].images.largeUrl # med_url = the_titles[0].images.medUrl # small_url = the_titles[0].images.smallUrl # else: # large_url = med_url = small_url = '' large_url = med_url = small_url = "" SpecialOrders = [ tso.id for tso in Title.selectBy( isbn=isbn ).throughTo.specialorder_pivots.filter( TitleSpecialOrder.q.orderStatus == "ON ORDER" ) ] return { "title": ProductName, "authors": authors, "authors_as_string": authors_as_string, "categories_as_string": categories_as_string, "list_price": ListPrice, "publisher": Manufacturer, "isbn": isbn, "orig_isbn": orig_isbn, "large_url": large_url, "med_url": med_url, "small_url": small_url, "format": Format, "kind": Kind, "known_title": known_title, "special_order_pivots": SpecialOrders, } else: # we don't have it yet # if we're using amazon ecs if use_amazon_ecs: sleep(1) # so amazon doesn't get huffy ecs.setLicenseKey(amazon_license_key) ecs.setSecretAccessKey(amazon_secret_key) ecs.setAssociateTag(amazon_associate_tag) ##print "about to search", isbn, isbn[0] amazonBooks = [] idType = "" if len(isbn) == 12: idType = "UPC" elif len(isbn) == 13: # if we are using an internal isbn if isbn.startswith(internal_isbn_prefix): return [] # otherwise search on amazon. elif isbn.startswith("978") or isbn.startswith("979"): idType = "ISBN" else: idType = "EAN" try: print("searching amazon for ", isbn, idType, file=sys.stderr) amazonProds = AmzSear(isbn) print(amazonProds, file=sys.stderr) except (ecs.InvalidParameterValue, HTTPError): pass if amazonProds: print(amazonProds, file=sys.stderr) # inner comprehension tests each prodict for price whose type is in formats # if we find a price which its key is in formats, then we return the coorresponding product format_list = [ "Paperback", "Mass Market Paperback", "Hardcover", "Perfect Paperback", "Pamphlet", "Plastic Comb", "Spiral-bound", "Print on Demand (Paperback)", "DVD", "Calendar", "Board book", "Audio Cassette", "Cards", "Audio CD", "Diary", "DVD-ROM", "Library Binding", "music", "Vinyl", "Health and Beauty", "Hardback", ] prods = [ x for x in amazonProds.values() if [dum for dum in x["prices"].keys() if dum in format_list] ] for prod1 in prods: print(prod1, file=sys.stderr) price_dict = prod1["prices"] listprice = max(price_dict.values()) format = [k for k in format_list if k in price_dict] format = format[0] if not format: continue title = prod1["title"] image_url = prod1["image_url"] authors = [ x.replace("by ", "") for x in prod1["subtext"] if x.startswith("by ") ] auth_list = [ y.strip() for a in [x.split(", ") for x in authors[0].split(" and ")] for y in a ] # we assume any full name less than five characters is an abbreviation like 'Jr.' # so we add it back to the previous authorname abbrev_list = [i for i, x in enumerate(auth_list) if len(x) < 5] for i in abbrev_list: auth_list[i - 1 : i + 1] = [ ", ".join(auth_list[i - 1 : i + 1]) ] return { "title": title, "authors": auth_list, "authors_as_string": ",".join(auth_list), "categories_as_string": "", "list_price": listprice, "publisher": "", "isbn": isbn, "orig_isbn": isbn, "large_url": image_url, "med_url": image_url, "small_url": image_url, "format": format, "kind": "books", "known_title": known_title, "special_orders": [], } else: traceback.print_exc() print("using isbnlib from ecs", file=sys.stderr) isbnlibbooks = [] try: isbnlibbooks = isbnlib.meta(str(isbn)) except: pass if isbnlibbooks: return { "title": isbnlibbooks["Title"], "authors": isbnlibbooks["Authors"], "authors_as_string": ",".join(isbnlibbooks["Authors"]), "categories_as_string": None, "list_price": price, "publisher": isbnlibbooks["Publisher"], "isbn": isbn, "orig_isbn": isbn, "large_url": None, "med_url": None, "small_url": None, "format": None, "kind": "books", "known_title": known_title, "special_orders": [], } else: return {} else: # if we're scraping amazon print("scraping amazon", file=sys.stderr) headers = { "User-Agent": random.sample(user_agents, 1).pop() } amazon_url_template = "http://www.amazon.com/dp/%s/" if len(isbn) == 13: isbn10 = None if isbnlib.is_isbn13(isbn): isbn10 = isbnlib.to_isbn10(isbn) else: return {} if isbn10: with requests.Session() as session: try: print("getting amazon") page_response = session.get( amazon_url_template % isbn10, headers=headers, timeout=0.1 ) print("got response") page_content = BeautifulSoup(page_response.content, "lxml") print("got parsed content") try: booktitle = page_content.select("#productTitle").pop().text except Exception as e: traceback.print_exc() booktitle = '' popover_preload = [ a.text for a in page_content.select( ".author.notFaded .a-popover-preload a.a-link-normal" ) ] author_name = [ a.text for a in page_content.select( ".author.notFaded a.a-link-normal" ) if a.text not in popover_preload ] contributor_role = page_content.select(".contribution span") try: contributor_role = [ re.findall("\w+", cr.text).pop() for cr in contributor_role ] except Exception as e: traceback.print_exc() contributor_role = [] author_role = zip(author_name, contributor_role) try: listprice = ( page_content.select(".a-text-strike").pop().text ) except IndexError as e: print("using bookfinder4u") if "listprice" not in locals(): with requests.Session() as session: bookfinderurl = "http://www.bookfinder4u.com/IsbnSearch.aspx?isbn='%s'&mode=direct" url = bookfinderurl % isbn try: page_response2 = session.get( url, headers=headers, timeout=0.1 ) page_content2 = BeautifulSoup( page_response2.content, "lxml" ) except Exception as e: traceback.print_exc() listprice = 0.0 else: try: matches = re.search( "List\sprice:\s(\w{2,4})\s(\d+(.\d+)?)", page_content2.text, re.I, ) if matches: listprice = matches.groups()[1] else: listprice = 0.00 except Exception as e: traceback.print_exc() listprice = 0.00 try: book_edition = ( page_content.select("#bookEdition").pop().text ) except Exception as e: traceback.print_exc() book_edition = "" try: matches = re.findall( "(?<=imageGalleryData'\s:\s\[)\{.*?\}", page_content.contents[1].text, ) image_url_dict = eval(matches[0]) except Exception as e: traceback.print_exc() image_url_dict = {"mainUrl": "", "thumbUrl": ""} category_items = page_content.select(".zg_hrsr_ladder a") category_items = [a.text for a in category_items] product_details = page_content.select( "#productDetailsTable" ) # ul:first-of-type") try: product_details1 = product_details.pop().text.splitlines() quit_flag = 0 for pd in product_details1: if pd.endswith("pages"): format, numpages = pd.split(":") numpages = numpages.replace(" pages", "").strip() quit_flag += 1 continue if pd.startswith("Publisher: "): matches = re.match( "Publisher: ([^;^(]*)\s?([^(]*)?\W(.*)\W", pd ).groups() publisher = matches[0] publication_date = matches[2] quit_flag += 1 continue if quit_flag == 2: break else: publisher = '' format = '' except Exception as e: traceback.print_exc() publisher = '' format = '' if booktitle: return { "title": booktitle, "authors": author_name, "authors_as_string": ",".join(author_name), "categories_as_string": ",".join(category_items), "list_price": listprice, "publisher": publisher, "isbn": isbn, "orig_isbn": isbn, "large_url": image_url_dict["mainUrl"], "med_url": image_url_dict["mainUrl"], "small_url": image_url_dict["thumbUrl"], "format": format, "kind": "books", "known_title": known_title, "special_orders": [], } except Exception as e: traceback.print_exc() print("using isbnlib from scraper", file=sys.stderr) isbnlibbooks = [] try: isbnlibbooks = isbnlib.meta(str(isbn)) except: pass if isbnlibbooks: return { "title": isbnlibbooks["Title"], "authors": isbnlibbooks["Authors"], "authors_as_string": ",".join( isbnlibbooks["Authors"] ), "categories_as_string": None, "list_price": price, "publisher": isbnlibbooks["Publisher"], "isbn": isbn, "orig_isbn": isbn, "large_url": None, "med_url": None, "small_url": None, "format": None, "kind": "books", "known_title": known_title, "special_orders": [], } else: return {} else: if title: return { "title": title, "authors": author_name, "authors_as_string": ",".join(author_name), "categories_as_string": ",".join(category_items), "list_price": listprice, "publisher": publisher, "isbn": isbn, "orig_isbn": isbn, "large_url": image_url_dict["mainUrl"], "med_url": image_url_dict["mainUrl"], "small_url": image_url_dict["thumbUrl"], "format": format, "kind": "books", "known_title": known_title, "special_orders": [], } else: return {} else: return {}