Example #1
0
def process_line(line, extra_tags):
  line = re.sub(' *#.*$', '', line) # remove comments

  line = re.sub('-$', '', line)

  if not ' ' in line or re.match('.*[а-яіїєґ]/.*', line):
    out_line = line
  elif re.match('^[^ ]+ [^ ]+ [^:]?[a-z].*$', line):
    out_line = line
  elif re.match('^[^ ]+ [:^<a-z0-9_].*$', line):
    out_line = re.sub('^([^ ]+) ([^<a-z].*)$', '\\1 \\1 \\2', line)
  else:
    print('hit-', line, file=sys.stderr)
    base = re.findall('^[^ ]+', line)[0]
    out_line = re.sub('([^ ]+) ?', '\\1 ' + base + ' unknown' + extra_tags + '\n', line)
    return out_line[:-1]

#  if extra_tags != '' and not re.match('.* [a-z].*$', out_line):
  if extra_tags != '' and (not ' ' in out_line or ' ^' in out_line):
    extra_tags = ' ' + extra_tags
    
  if '|' in out_line:
    out_line = out_line.replace('|', extra_tags + '|')

#  if not "/" in out_line and not re.match("^[^ ]+ [^ ]+ [^ ]+$", out_line + extra_tags):
#    print("bad line:", out_line + extra_tags, file=sys.stderr)

#  if len(out_line)> 100:
#      print(out_line, file=sys.stderr)
#      sys.exit(1)

  return out_line + extra_tags
Example #2
0
    def __init__(self, host, debugfunc=None):
        if isinstance(host, types.TupleType):
            host, self.weight = host
        else:
            self.weight = 1

        #  parse the connection string
        m = re.match(r'^(?P<proto>unix):(?P<path>.*)$', host)
        if not m:
            m = re.match(r'^(?P<proto>inet):'
                    r'(?P<host>[^:]+)(:(?P<port>[0-9]+))?$', host)
        if not m: m = re.match(r'^(?P<host>[^:]+):(?P<port>[0-9]+)$', host)
        if not m:
            raise ValueError('Unable to parse connection string: "%s"' % host)

        hostData = m.groupdict()
        if hostData.get('proto') == 'unix':
            self.family = socket.AF_UNIX
            self.address = hostData['path']
        else:
            self.family = socket.AF_INET
            self.ip = hostData['host']
            self.port = int(hostData.get('port', 11211))
            self.address = ( self.ip, self.port )

        if not debugfunc:
            debugfunc = lambda x: x
        self.debuglog = debugfunc

        self.deaduntil = 0
        self.socket = None

        self.buffer = ''
Example #3
0
    def importAuto(cls, string, path=None, activeFit=None, callback=None, encoding=None):
        # Get first line and strip space symbols of it to avoid possible detection errors
        firstLine = re.split("[\n\r]+", string.strip(), maxsplit=1)[0]
        firstLine = firstLine.strip()

        # If XML-style start of tag encountered, detect as XML
        if re.match("<", firstLine):
            if encoding:
                return "XML", cls.importXml(string, callback, encoding)
            else:
                return "XML", cls.importXml(string, callback)

        # If JSON-style start, parse as CREST/JSON
        if firstLine[0] == '{':
            return "JSON", (cls.importCrest(string),)

        # If we've got source file name which is used to describe ship name
        # and first line contains something like [setup name], detect as eft config file
        if re.match("\[.*\]", firstLine) and path is not None:
            filename = os.path.split(path)[1]
            shipName = filename.rsplit('.')[0]
            return "EFT Config", cls.importEftCfg(shipName, string, callback)

        # If no file is specified and there's comma between brackets,
        # consider that we have [ship, setup name] and detect like eft export format
        if re.match("\[.*,.*\]", firstLine):
            return "EFT", (cls.importEft(string),)

        # Use DNA format for all other cases
        return "DNA", (cls.importDna(string),)
Example #4
0
    def __init__(self, filename):
        self.name = "YNAB" 
        self.transactions = []

        with open(filename) as register:
            dr = csv.DictReader(register)
            for row in dr:
                trans = self._process_row(row)
                while True:  # Merge split transactions into a single transaction
                    regex = r'\(Split ([0-9]+)/([0-9]+)\)'
                    match = re.match(regex, row["Memo"])
                    if not match:
                        break

                    for split_row in dr:
                        match = re.match(regex, split_row["Memo"])
                        t = self._process_row(split_row)
                        trans.amount += t.amount

                        current_split = match.group(1)
                        max_splits = match.group(2)
                        if current_split == max_splits:
                            break
                    break

                trans.amount = round(trans.amount, 2)  # This fixes errors from adding numbers that can't be represented in binary and expecting them to equal one that can that came from Mint.
                self.transactions.append(trans)

        self.transactions.sort()
Example #5
0
  def _apache_index(self, url):
    r = requests.get(url)
    if r.status_code != 200:
      raise ValueError(url+" status:"+str(r.status_code))
    r.dirs = []
    r.files = []
    for l in r.content.split("\n"):
      # '<img src="/icons/folder.png" alt="[DIR]" /> <a href="7.0/">7.0/</a>       03-Dec-2014 19:57    -   '
      # ''<img src="/icons/tgz.png" alt="[   ]" /> <a href="owncloud_7.0.4-2.diff.gz">owncloud_7.0.4-2.diff.gz</a>                     09-Dec-2014 16:53  9.7K   <a href="owncloud_7.0.4-2.diff.gz.mirrorlist">Details</a>'
      # 
      m = re.search("<a\s+href=[\"']?([^>]+?)[\"']?>([^<]+?)[\"']?</a>\s*([^<]*)", l, re.I)
      if m:
	# ('owncloud_7.0.4-2.diff.gz', 'owncloud_7.0.4-2.diff.gz', '09-Dec-2014 16:53  9.7K   ')
	m1,m2,m3 = m.groups()

	if re.match("(/|\?|\w+://)", m1):	# skip absolute urls, query strings and foreign urls
	  continue
	if re.match("\.?\./?$", m1):	# skip . and ..
	  continue

	m3 = re.sub("[\s-]+$", "", m3)
	if re.search("/$", m1):
	  r.dirs.append([m1, m3])
	else:
	  r.files.append([m1, m3])
    return r
Example #6
0
def area_code_lookup(request, area_id, format):
    from mapit.models import Area, CodeType
    area_code = None
    if re.match('\d\d([A-Z]{2}|[A-Z]{4}|[A-Z]{2}\d\d\d|[A-Z]|[A-Z]\d\d)$', area_id):
        area_code = CodeType.objects.get(code='ons')
    elif re.match('[EW]0[12]\d{6}$', area_id): # LSOA/MSOA have ONS code type
        area_code = CodeType.objects.get(code='ons')
    elif re.match('[ENSW]\d{8}$', area_id):
        area_code = CodeType.objects.get(code='gss')
    if not area_code:
        return None

    args = { 'format': format, 'codes__type': area_code, 'codes__code': area_id }
    if re.match('[EW]01', area_id):
        args['type__code'] = 'OLF'
    elif re.match('[EW]02', area_id):
        args['type__code'] = 'OMF'

    area = get_object_or_404(Area, **args)
    path = '/area/%d%s' % (area.id, '.%s' % format if format else '')
    # If there was a query string, make sure it's passed on in the
    # redirect:
    if request.META['QUERY_STRING']:
        path += "?" + request.META['QUERY_STRING']
    return HttpResponseRedirect(path)
Example #7
0
def checkInCNAME(node_text, nodes):
	try:
		InCNAME = re.search("IN CNAME (.*)", node_text)
		alias = InCNAME.group(0).split("IN CNAME ")[1]
		#IP address found
		if re.match("(\d{1,3}\.)", alias):
			return alias
		# cname is a subdomain
		elif re.match(".*[a-x]\.", alias):
			return ("subdomain found (" + alias + ")")
		#cname is another cname
		else:
			try:
				alias_name = dns.name.Name([alias])
				alias_IP = nodes[alias_name].to_text(alias_name)
				checkCname = checkInA(alias_IP)
				if checkCname is None:
					return checkInCNAME(alias_IP, nodes)
				else:
					return checkCname
			except:
				return (Fore.RED + "unknown host (" + alias + ")" + Fore.RESET)
	# node has no IN CNAME
	except:
		return None
Example #8
0
def parse_requirements(requirements_file='requirements.txt'):
    requirements = []
    with open(requirements_file, 'r') as f:
        for line in f:
            # For the requirements list, we need to inject only the portion
            # after egg= so that distutils knows the package it's looking for
            # such as:
            # -e git://github.com/openstack/nova/master#egg=nova
            if re.match(r'\s*-e\s+', line):
                requirements.append(re.sub(r'\s*-e\s+.*#egg=(.*)$', r'\1',
                                    line))
            # such as:
            # http://github.com/openstack/nova/zipball/master#egg=nova
            elif re.match(r'\s*https?:', line):
                requirements.append(re.sub(r'\s*https?:.*#egg=(.*)$', r'\1',
                                    line))
            # -f lines are for index locations, and don't get used here
            elif re.match(r'\s*-f\s+', line):
                pass
            # -r lines are for including other files, and don't get used here
            elif re.match(r'\s*-r\s+', line):
                pass
            # argparse is part of the standard library starting with 2.7
            # adding it to the requirements list screws distro installs
            elif line == 'argparse' and sys.version_info >= (2, 7):
                pass
            else:
                requirements.append(line.strip())
    return requirements
def parse_report(path):
    """ Return the volume informations contained in the SIENAX report. This
        is a dictionary with keys "grey", "white", and "brain". 
        The informations for the different tissues is a dictionary with the
        normalized and raw values, in cubic millimeters.
        
        adapted from: http://code.google.com/p/medipy/source/browse/plugins/fsl/sienax.py
        see licence: http://code.google.com/p/medipy/source/browse/LICENSE
    """
    
    report = {}
    
    fd = open(path)
    for line in fd.readlines() :        
        for tissue in ["GREY", "WHITE", "BRAIN"] :
            pattern = tissue + r"\s+([\d+\.]+)\s+([\d+\.]+)"
            measure = re.match(pattern, line)
            if measure :
                normalized = float(measure.group(1))
                raw = float(measure.group(2))
                report[tissue.lower()] = {"normalized" : normalized, "raw" : raw}
                continue
        
        vscale = re.match("VSCALING ([\d\.]+)", line)
        if vscale :
            report["vscale"] = float(vscale.group(1))
    
    return report
Example #10
0
def check_api_version_decorator(logical_line, previous_logical, blank_before,
                                filename):
    msg = ("N332: the api_version decorator must be the first decorator"
           " on a method.")
    if blank_before == 0 and re.match(api_version_re, logical_line) \
           and re.match(decorator_re, previous_logical):
        yield(0, msg)
Example #11
0
def parse_template(template_name):
    """Given a template name, attempt to extract its group name and upload date

    Returns:
        * None if no groups matched
        * group_name, datestamp of the first matching group. group name will be a string,
          datestamp with be a :py:class:`datetime.date <python:datetime.date>`, or None if
          a date can't be derived from the template name
    """
    for group_name, regex in stream_matchers:
        matches = re.match(regex, template_name)
        if matches:
            groups = matches.groupdict()
            # hilarity may ensue if this code is run right before the new year
            today = date.today()
            year = int(groups.get('year', today.year))
            month, day = int(groups['month']), int(groups['day'])
            # validate the template date by turning into a date obj
            template_date = futurecheck(date(year, month, day))
            return TemplateInfo(group_name, template_date, True)
    for group_name, regex in generic_matchers:
        matches = re.match(regex, template_name)
        if matches:
            return TemplateInfo(group_name, None, False)
    # If no match, unknown
    return TemplateInfo('unknown', None, False)
def parse(fh):
    stats = []
    for line in fh:
        m = re.match(r'TRANSLATION\s+(?P<content>.*)\n', line)
        if not m:
            continue

        line = m.group('content')
        m = re.match(r'(?P<group>[a-zA-Z_@.]+):', line)
        if not m:
            sys.stderr.write('Malformed TRANSLATION line: %s\n' % line)
            continue

        stat = {'group': m.group('group')}

        if stat['group'] == 'total':
            continue
        else:
            sum = 0
            for x in stat_types:
                m = re.search(r'\b(?P<count>\d+) %s (message|translation)' % x,
                              line)
                if m:
                    stat[x] = int(m.group('count'))
                    sum += stat[x]
            stat['total'] = sum
        stats.append(stat)

    return stats
def readFile(fileV4, fileV6, trie):
	
	# open ipv4 file
	input = open(fileV4, "r")
	pattern = '(\d+)\,(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/(\d{1,2}).*'
	
	for line in input:
		result = re.match(pattern, line)
		if result:
			address = result.group(2)
			length = result.group(3)
			asn = result.group(1)
			update = True
			withdrawal = False
			count = 0
			insertTrie(trie, address, length, asn, update, withdrawal, count)

	# open ipv6 file
	input = open(fileV6, "r")
	pattern = '(\d+)\,(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]).){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9]))/(\d{1,3}),.*'

	for line in input:
		result = re.match(pattern, line)
		if result:
			address = result.group(2)
			length = result.group(32)
			asn = result.group(1)
			update = True
			withdrawal = False
			count = 0

			insertTrie(trie, address, length, asn, update, withdrawal, count)
	
	return trie
Example #14
0
    def history(self, page):
        GIT_COMMIT_FIELDS = ["commit", "author", "date", "date_relative", "message"]
        GIT_LOG_FORMAT = "%x1f".join(["%h", "%an", "%ad", "%ar", "%s"]) + "%x1e"
        output = git.log("--format=%s" % GIT_LOG_FORMAT, "--follow", "-z", "--shortstat", page.abspath)
        output = output.split("\n")
        history = []
        for line in output:
            if "\x1f" in line:
                log = line.strip("\x1e\x00").split("\x1f")
                history.append(dict(zip(GIT_COMMIT_FIELDS, log)))
            else:
                insertion = re.match(r".* (\d+) insertion", line)
                deletion = re.match(r".* (\d+) deletion", line)
                history[-1]["insertion"] = int(insertion.group(1)) if insertion else 0
                history[-1]["deletion"] = int(deletion.group(1)) if deletion else 0

        max_changes = float(max([(v["insertion"] + v["deletion"]) for v in history])) or 1.0
        for v in history:
            v.update(
                {
                    "insertion_relative": str((v["insertion"] / max_changes) * 100),
                    "deletion_relative": str((v["deletion"] / max_changes) * 100),
                }
            )
        return history
Example #15
0
def filter_services(svcs):
    filtered = []

    # filter includes
    if _args['--has']:
        for sv in svcs:
            for inc in _args['--has']:
                if inc in sv["tags"] and sv not in filtered:
                    filtered.append(sv)

    if _args['--match']:
        for sv in svcs:
            for regex in _args['--match']:
                for tag in sv["tags"]:
                    if re.match(regex, tag) and sv not in filtered:
                        filtered.append(sv)

    if not filtered and not _args['--has'] and not _args['--match']:
        filtered = svcs

    if _args['--has-not']:
        for sv in list(filtered):  # operate on a copy, otherwise .remove would change the list under our feet
            for exc in _args['--has-not']:
                if exc in sv["tags"]:
                    filtered.remove(sv)

    if _args['--no-match']:
        for sv in list(filtered):
            for regex in _args['--no-match']:
                for tag in sv["tags"]:
                    if re.match(regex, tag) and sv in list(filtered):
                        filtered.remove(sv)

    return filtered
def main():
	f = open("makefile2wrappers.txt","r");
	lins = f.readlines();
	f.close();
	
	for l in lins:
		l = l.strip();
		if len(l)==0:
			continue;

		print('Line: '+l);
		# $(C) -DDINT -c ../Source/umf_analyze.c -o umf_i_analyze.o
		defs=re.match(".*\)(.*)-c",l).group(1).strip();
		# If there's no "-o" flag, just compile the file as is:
		if re.search('.*-o.*',l)!=None:
			src=re.match(".*-c(.*)-o",l).group(1).strip();
			out=re.match(".*-o(.*)",l).group(1).strip();
			f='SourceWrappers/'+out+".c";
			print(' => Creating '+f+'\n');
			o = open(f,"w");
			DEFs = defs.strip().split("-D");
			DEFs = [x for x in DEFs if x]; # Remove empty 
			for d in DEFs:
				o.write('#define '+d+'\n');
			o.write('#include <'+src+'>'+'\n');
			o.close();	
		else:
			src=re.match(".*-c(.*)",l).group(1).strip();
			f = "SourceWrappers/"+os.path.basename(src);
			print(' => Creating '+f+'\n');
			o = open(f,"w");
			o.write('#include <'+src+'>'+'\n');
			o.close();
				
	return 0
Example #17
0
    def process_isolation_file(self, sql_file, output_file):
        """
            Processes the given sql file and writes the output
            to output file
        """
        try:
            command = ""
            for line in sql_file:
                tinctest.logger.info("re.match: %s" % re.match(r"^\d+[q\\<]:$", line))
                print >> output_file, line.strip(),
                (command_part, dummy, comment) = line.partition("--")
                if command_part == "" or command_part == "\n":
                    print >> output_file
                elif command_part.endswith(";\n") or re.match(r"^\d+[q\\<]:$", line):
                    command += command_part
                    tinctest.logger.info("Processing command: %s" % command)
                    self.process_command(command, output_file)
                    command = ""
                else:
                    command += command_part

            for process in self.processes.values():
                process.stop()
        except:
            for process in self.processes.values():
                process.terminate()
            raise
        finally:
            for process in self.processes.values():
                process.terminate()
Example #18
0
    def test_various_ops(self):
        # This takes about n/3 seconds to run (about n/3 clumps of tasks,
        # times about 1 second per clump).
        NUMTASKS = 10

        # no more than 3 of the 10 can run at once
        sema = threading.BoundedSemaphore(value=3)
        mutex = threading.RLock()
        numrunning = Counter()

        threads = []

        for i in range(NUMTASKS):
            t = TestThread("<thread %d>" % i, self, sema, mutex, numrunning)
            threads.append(t)
            self.assertEqual(t.ident, None)
            self.assertTrue(re.match("<TestThread\(.*, initial\)>", repr(t)))
            t.start()

        if verbose:
            print("waiting for all tasks to complete")
        for t in threads:
            t.join(NUMTASKS)
            self.assertTrue(not t.is_alive())
            self.assertNotEqual(t.ident, 0)
            self.assertFalse(t.ident is None)
            self.assertTrue(re.match("<TestThread\(.*, stopped -?\d+\)>", repr(t)))
        if verbose:
            print("all tasks done")
        self.assertEqual(numrunning.get(), 0)
Example #19
0
def main():    
    f = open('4_dataset.txt', 'r')
    x = f.readlines()
    
    for line in x:
        if re.match('a={(.*)}', line):
            a = re.match('a={(.*)}', line).group(1).split(',')
        elif re.match('b={(.*)}', line):
            b = re.match('b={(.*)}', line).group(1).split(',')
    
    f00 = f01 = f10 = f11 = 0
    print 'a =', [ int(i) for i in a ]
    print 'b =', [ int(i) for i in b ]
    
    for i in zip(a, b):
        if i == ('0', '0'):
            f00 += 1
        if i == ('0', '1'):
            f01 += 1
        if i == ('1', '0'):
            f10 += 1
        if i == ('1', '1'):
            f11 += 1
    
    print 'Similarity Coeff =', float(f00 + f11)/(f00 + f01 + f10 + f11)
    print 'Jaccard Coeff =', f11/float(f01 + f10 + f11)
Example #20
0
def tourAllFiles(dirpath):
    global a
    global alen
    global domain
    global person
    
    # names = list of files in current path
    names = os.listdir(dirpath)
    
    # find 'si' and 'sx' prefix and 'phn' suffix
    # filter out 'sa' prefix
    pat1 = '.*si.*\.phn'
    pat2 = '.*sx.*\.phn'
    drpat = 'dr\d'
    for name in names:
        if re.match(pat1,name) != None or re.match(pat2,name) != None:
            phn2label(name)
        
        curpath = dirpath+'/'+name
        if os.path.isdir(curpath):
            # only use to drx/person/xxx.phn
            if re.match(drpat,name):
                domain = name
            else:
                person = name
            # iterate
            os.chdir(curpath)
            tourAllFiles(curpath)
            os.chdir(dirpath)
Example #21
0
    def processAux(self, dFrag):
        self.depth=self.depth+1
        if not self.files.has_key(self.depth):
            self.files[self.depth]=[]
        thisDir=self.compoundDir(self.topDir, dFrag)
        os.chdir(thisDir)
        self.theDict[thisDir]={'xml': [], 'bin': [], 'dir': []}
        # print "Processing",thisDir," Depth",self.depth
        thisDirContents=os.listdir(thisDir)
        for fname in thisDirContents:
            if stat.S_ISDIR(os.stat(fname)[stat.ST_MODE]):
                if not re.match("^(CVS|images|search|photos|htdig|\.)", fname) and self.depth<4:
                    self.processAux(self.compoundDir(dFrag,fname))
                    self.handleDir(thisDir, fname)
                    os.chdir(thisDir)
            else:
                # print "File",fname
                if re.match(".*\.xml$", fname):
                    self.handleXML(thisDir, dFrag, fname)
                elif re.match(".*\.(jpe?g|JPG|gif|png|html)$",
                              fname):
                    self.handleBinary(thisDir, fname)

        self.writeIndex(dFrag)
        self.depth=self.depth-1
    def parse(self, response):
        sel = Selector(response)
        result = []
       
        ad = DatesItem()
        ad['name'] = ""
        for p in sel.xpath("//div[@class='poziomd']//text()").extract():

            if re.match("^.*,", p):
                if p.startswith(","):
                    ad['desc'] = p[2:]
                else:
                    ad['desc'] = p[6:]
                ad['name'] = ad['name'].lstrip('1234567890() ').strip()
                if re.match('^.\s', ad['name']):
                    ad['name'] = ad['name'][2:]

                ad['url'] = response.url
                if re.match(".*urodzeni.*", response.url):
                    ad['isBirth'] = True
                else:
                    ad['isBirth'] = False

                result.append(ad)
                ad = DatesItem()
                ad['name'] = ""
            elif re.match("^\s*[0-9]{1,4}", p) and not ad.has_key('date'):
                ad['date'] = re.match("^\s*[0-9]{1,4}", p).group()
            else:
                ad['name'] = ad['name'] + p
        return result
Example #23
0
def register(request) :
  '''
  Handle a Post request with the following information:
  login, password, email
  '''
  print 'receiving a request'
  #parameter retrieval
  try :
    login = request.GET['registerLogin']
    password = request.GET['registerPassword']
    email = request.GET['registerEmail']
  except MultiValueDictKeyError :
    response=HttpResponse('400 - BAD URI')
    response.status_code=400
    return response
  
  #parameter validation
  loginIsValid = re.match('[\w0-9]*', login) and len(login) > 3 and len(login) < 16
  passwordIsValid = len(password) >= 6 
  #TODO check with number
  emailIsValid = re.match('[\w.]*@\w*\.[\w.]*', email)
  
  logger.info(login + ' ' + password + ' ' + email)
  
  if loginIsValid and passwordIsValid and emailIsValid :
     return processFormInformation(login, password, email, request)   
  else :
    response=HttpResponse("400")
    response['message'] = 'invalid information'
    response.status_code=400
    return response
Example #24
0
 def _get_type_of_macro(self, macros, clss):
     for macro in macros:
         # ARGN Macros
         if re.match('ARG\d', macro):
             macros[macro]['type'] = 'ARGN'
             continue
         # USERN macros
         # are managed in the Config class, so no
         # need to look that here
         elif re.match('_HOST\w', macro):
             macros[macro]['type'] = 'CUSTOM'
             macros[macro]['class'] = 'HOST'
             continue
         elif re.match('_SERVICE\w', macro):
             macros[macro]['type'] = 'CUSTOM'
             macros[macro]['class'] = 'SERVICE'
             # value of macro: re.split('_HOST', '_HOSTMAC_ADDRESS')[1]
             continue
         elif re.match('_CONTACT\w', macro):
             macros[macro]['type'] = 'CUSTOM'
             macros[macro]['class'] = 'CONTACT'
             continue
         # On demand macro
         elif len(macro.split(':')) > 1:
             macros[macro]['type'] = 'ONDEMAND'
             continue
         # OK, classical macro...
         for cls in clss:
             if macro in cls.macros:
                 macros[macro]['type'] = 'class'
                 macros[macro]['class'] = cls
                 continue
Example #25
0
    def _sanitize(self, badKey, badVal):
        valid = True

        # Used for debugging
        if 'csv_line' not in self:
            self['csv_line'] = "-1"

        # Catch bad formatting
        if badKey in self:
            logging.debug(badKey, ''.join(self[badKey]))
            logging.debug("Bad Key")
            valid = False

        if 'last_pymnt_d' in self and re.match("^\s*$", self['last_pymnt_d']):
            if 'issue_d' in self:
                # If no payment received, last payment date = issue date
                self['last_pymnt_d'] = self['issue_d']

        for k, v in self.items():
            if badVal == v:
                logging.debug(badVal)
                valid = False
                break

            # Replace empties with 0s
            if re.match('^\s*$', str(v)):
                self[k] = 0

        if not valid:
            logging.debug(self.items())
            # Can't safely access specific keys, other than id, when incorrectly formatted
            logging.warning("Fix Loan {}".format(self['id']))
            logging.warning("Line {}".format(self['csv_line']))

        return valid
Example #26
0
def _strip_and_unquote( keys, value ):
    if value[:3] == "'''":
        m = re.match( _MULTI_LINE_SINGLE, value )
        if m:
            value = m.groups()[0]
        else:
            raise IllegalValueError( "string", keys, value )

    elif value[:3] == '"""':
        m = re.match( _MULTI_LINE_DOUBLE, value )
        if m:
            value = m.groups()[0]
        else:
            raise IllegalValueError( "string", keys, value )

    elif value[0] == '"':
        m = re.match( _DQ_VALUE, value )
        if m:
            value = m.groups()[0]
        else:
            raise IllegalValueError( "string", keys, value )

    elif value[0] == "'":
        m = re.match( _SQ_VALUE, value )
        if m:
            value = m.groups()[0]
        else:
            raise IllegalValueError( "string", keys, value )
    else:
        # unquoted
        value = re.sub( '\s*#.*$', '', value )

    # Note strip() removes leading and trailing whitespace, including
    # initial newlines on a multiline string:
    return value.strip()
Example #27
0
def replace_links_with_text(html):
    """any absolute links will be replaced with the
    url in plain text, same with any img tags
    """
    soup = BeautifulSoup(html, 'html5lib')
    abs_url_re = r'^http(s)?://'

    images = soup.find_all('img')
    for image in images:
        url = image.get('src', '')
        text = image.get('alt', '')
        if url == '' or re.match(abs_url_re, url):
            image.replaceWith(format_url_replacement(url, text))

    links = soup.find_all('a')
    for link in links:
        url = link.get('href', '')
        text = ''.join(link.text) or ''

        if text == '':  # this is due to an issue with url inlining in comments
            link.replaceWith('')
        elif url == '' or re.match(abs_url_re, url):
            link.replaceWith(format_url_replacement(url, text))

    return force_text(soup.find('body').renderContents(), 'utf-8')
Example #28
0
    def __load_book_menu (self, lines) :
	r1 = re.compile(u'^\s*目\s*录\s*$')
	r2 = re.compile(u'^\s*([^·…]+)\s*[·.…]{2,}\s*([l\d]+)\s*$')
	menus = {}
	start = False
	not_match = 0
	for line in lines :
	    words = line.decode(self.default_coding)
	    words.strip('\n')
	    if re.match(r1, words) :
		start = True
		continue
	    elif start :
		m = re.match(r2, words)
		if m :
		    title = m.group(1)
		    page  = m.group(2)
		    page  = page.replace('l', '1')
		    page  = int(page.encode(self.default_coding))
		    menus[page] = self.__get_simple_string(title)
		    not_match = 0
		else :
		    not_match += 1
		    if not_match > 10 :
			break
	
	return menus
Example #29
0
def create_filetree(path=None, depth=0, max_depth=0):

    tree = None

    if max_depth == 0 or depth < max_depth:
        if path is None:
            path = os.getcwd()

        tree = dict(name=os.path.basename(path), children=[])

        try:
            lst = os.listdir(path)
        except OSError:
            pass  # ignore errors
        else:
            for name in lst:
                fn = os.path.join(path, name)
                if (os.path.isdir(fn) and
                        re.match('^.*(Compiled)$', fn) is None):
                    child = create_filetree(fn, depth + 1, max_depth)
                    if child is not None:
                        tree['children'].append(child)
                elif re.match('^.*\.(m|def|txt|csv)$', fn) is not None:
                    tree['children'].append(dict(name=fn.replace(
                        os.getcwd() + os.path.sep, "")))

    return tree
Example #30
0
def process_line_exceptions(line, extra_tags):
    global except_base_tag

    if not ' ' in line or re.match('.*[а-яіїєґ]/.*', line):
      return line
    if re.match('^[^ ]+ [^ ]+ [^:]?[a-z].*$', line):
      return line

    if line.startswith('# !'):
      except_base_tag = re.findall('![a-z:-]+', line)[0][1:] + ':'
      return ''
    
    base = re.findall('^[^ ]+', line)[0]
    
    except_base_tag2 = except_base_tag
    if base.endswith('ся'):
        except_base_tag2 = except_base_tag.replace('verb:', 'verb:rev:')
      
    out_line = re.sub('([^ ]+) ?', '\\1 ' + base + ' ' + except_base_tag2 + 'unknown' + extra_tags + '\n', line)
    
    if except_base_tag in ('verb:imperf:', 'verb:perf:'):
      base_add = 'inf:'
#      if base.endswith('ся'):
#        base_add = 'rev:' + base_add
      out_line = re.sub("(verb:(?:rev:)?)((im)?perf:)", "\\1inf:\\2", out_line, 1)
      
      out_lines = out_line.split('\n')
      out_lines[0] = out_lines[0].replace(':unknown', '')
      out_line = '\n'.join(out_lines)
    
    return out_line[:-1]