Exemple #1
0
def _check_hostname(name):
    _check_endpoint_type(name)
    hostname_checker = _compile('^http(s)?://[a-zA-Z0-9-_\.]+(:[0-9]+)?$')

    if not hostname_checker.match(name):
        raise ValueError('endpoint name {} should be in http(s)://<hostname>[:<port>] and hostname may consist only of:'
                         ' a-z, A-Z, 0-9, underscore and hyphens.'.format(name))
Exemple #2
0
def convert(rome):
	"""Accepts a Roman number in string format and returns an integer hoding the value it represents
		
		The regex search checks in portions if a part of the string is can be a Roman number
		The organization of the math groups also checks for the plausability of the number
		in that a Roman code for hundreds does not come after that of tens.
		
		This above is achieved by checking for thousands, hundreds, tens and then ones in order.
		
		The program idea was obtained from Mark Pilgrim's "Dive Into Python" book but
		has been tweaked just a bit for re-usability and effectiveness.
		
		The Exception arguments especially for those raised after assessment of the "offender" argument
		describe concisely what the probem with the provided Roman number is.
	"""
	match = _compile(r"^\s*(M{,})(CM|CD|D?C{,})(XC|XL|L?X{,})(IX|IV|V?I{,})\s*$").search(rome.upper())
	
	if not match:
		raise InvalidRoman("{} is not a valid Roman number.".format(rome))
	
	offender = filter(lambda y: len(y) > 3, match.groups())
	if offender:
		if 'M' in offender[0]:
			raise InvalidRoman("{} is not a supported Roman number".format(rome))
			#just to clarify that only numbers below 4000 are supported thus that is the error.
			
		else: raise InvalidRoman("{} is not a valid Roman number.".format(rome))
	
	return __assess(match.groups())
Exemple #3
0
def import_raw_svg(raw_svg=maps_dir + "raw_svg/core.svg"):
    """parse an svg from the patched version of bigg and save it in the
    maps directory"""
    non_empty = _compile(".")
    with open(raw_svg) as infile:
        svg = _SVGsoup(infile)
    rxn_layer = svg.findChild(name="g", id="Layer_rxn")
    met_layer = svg.findChild(name="g", id="Layer_met")
    for svg_rxn in rxn_layer.findChildren(name="g", recursive=False):
        del(svg_rxn["stroke"])
        del(svg_rxn.a["xlink:href"])
        for path in svg_rxn.findChildren(name="path", attrs={"marker-end": non_empty}):
            del(path["marker-end"])
            path["class"] = "end"
        for path in svg_rxn.findChildren(name="path", attrs={"marker-start": non_empty}):
            del(path["marker-start"])
            path["class"] = "start"
    for met_rxn in met_layer.findChildren(name="g", recursive=False):
        del(met_rxn.a["xlink:href"])
    rxn_colors = _Tag(svg, name="style")
    rxn_colors["id"] = "object_styles"
    svg.defs.append(rxn_colors)
    # write the processed file out to the maps directory
    with open(maps_dir + _path.split(raw_svg)[1], "w") as outfile:
        outfile.write(str(svg))
Exemple #4
0
 def apply_solution(self, flux_dict, color_map=default_color_map):
     self.object_styles.clear()
     fluxes = dict((i, flux_dict[i]) for i in self.included_reactions.intersection(flux_dict))
     abs_fluxes = [min(abs(i), 20) for i in fluxes.itervalues()]
     x_min = min(abs_fluxes)
     x_max = max(abs_fluxes)
     scale_func = lambda value: min(1, (abs(value) - x_min) / (x_max - x_min) * 3)
     for reaction, value in fluxes.iteritems():
         #t = _Tag(name="title")
         #t.string = "%.2f" % (value)
         self._rxn_layer.findChild("g", id=reaction).title.string += "\n%.2f" % (value)#append(t)
         try:
             t = _Tag(name="title")
             t.string = "%.2f" % (value)
             self._rxn_label_layer.findChild(name="text", text=_compile(reaction)).append(t)
         except: None
         if str(reaction) in self.included_reactions:
             self.set_object_color(str(reaction), color_map(scale_func(value)))
         if value < 0:
             self.object_styles["%s .end" % str(reaction)] = {"marker-end": "none"}
         if value > 0:
             self.object_styles["%s .start" % str(reaction)] = {"marker-start": "none"}
     for reaction in self.included_reactions.difference(flux_dict.keys()):
         self.set_object_color(reaction, (0, 0, 0))
     self._update_svg()
     return self
Exemple #5
0
 def _get_new_urls(self, page_url, soup):
     new_urls = set()
     links = soup.find_all('a', herf=re._compile(r"/view/d+\.htm"))
     for link in links:
         new_url = link['herf']
         new_full_url = urlparse.urljoin(page_url, new_url)
         new_urls.add(new_full_url)
     return new_urls
Exemple #6
0
def get_urls(url_str):
    x = urlopen(url_str)
    list_str = x.read()
    y = _compile(
        '''<a\s+href=(?P<quote>["'])?(?P<obf>[^"']+)(?(quote)(?P=quote))[^>]*?>''', IGNORECASE | VERBOSE)
    urls_M = []
    for i in y.finditer(list_str):
        urls_M.append(i.group('obf'))
    return urls_M
Exemple #7
0
 def _assertRegex(self, text, expected_regexp, msg=None):
     """Fail the test unless the text matches the regular
     expression.
     """
     if isinstance(expected_regexp, _basestring):
         expected_regexp = _compile(expected_regexp)
     if not expected_regexp.search(text):
         msg = msg or "Regexp didn't match"
         msg = '%s: %r not found in %r' % (msg, expected_regexp.pattern, text)
         raise self.failureException(msg)
Exemple #8
0
    def enableBranches(self, branches):
        '''
        Add one or more items to the list of branches that are needed for this 
        set of cuts.
        '''
        if isinstance(branches, str):
            branches = [branches]

        for b in branches:
            self.branchesNeeded.append(_compile(b))
Exemple #9
0
 def _assertNotRegex(self, text, unexpected_regexp, msg=None):
     """Fail the test if the text matches the regular
     expression.
     """
     if isinstance(unexpected_regexp, _basestring):
         unexpected_regexp = _compile(unexpected_regexp)
     match = unexpected_regexp.search(text)
     if match:
         msg = msg or 'Regexp matched'
         msg = '%s: %r matches %r in %r' % (msg,
                                            text[match.start():match.end()],
                                            unexpected_regexp.pattern,
                                            text)
         raise self.failureException(msg)
Exemple #10
0
def init(dllpath = None, root = "C:\\", bypass_check=False):
    """ Initialize the underlying tos-databridge DLL

    dllpath: string of the exact path of the DLL
    root: string of the directory to start walking/searching to find the DLL
    """  
    global _dll
    rel = set()
    if not bypass_check and dllpath is None and root == "C:\\":
        if abort_init_after_warn():
            return
    try:
        if dllpath is None:
            matcher = _partial( _match, _REGEX_DLL_NAME)  # regex match function
            for nfile in map( matcher, _listdir( _curdir )):
                if nfile: # try the current dir first             
                    rel.add( _curdir+ _sep + nfile.string )                    
            if not rel:                
                for root,dirs, files in _walk(root): # no luck, walk the dir tree 
                    for file in map( matcher, files):  
                        if file:                           
                            rel.add( root + _sep + file.string )                           
                if not rel: # if still nothing throw
                    raise TOSDB_Error(" could not locate DLL")    
            if len(rel) > 1:  # only use the most recent version(s)
                ver = _compile('-[\d]{1,2}.[\d]{1,2}-')
                vers = tuple( zip( map( 
                    lambda x: _search(ver,x).group().strip('-'), rel), rel) )
                vers_max = max(vers)[0].split('.')[0]
                mtup = tuple( (x[0].split('.')[1],x[1]) 
                              for x in vers if x[0].split('.')[0] == vers_max)         
                mtup_max = max(mtup)[0]
                rel = set( x[1] for x in mtup if x[0] == mtup_max )                      
            # find the most recently updated
            d = dict( zip(map( lambda x : _stat(x).st_mtime, rel), rel ) )
            rec = max(d)
            dllpath = d[ rec ]
        _dll = _WinDLL( dllpath )
        print( "+ Using Module ", dllpath )
        print( "+ Last Update ", _asctime(_localtime(_stat(dllpath).st_mtime)))
        if connect():
            print("+ Succesfully Connected to Service \ Engine")       
        else:
            print("- Failed to Connect to Service \ Engine")
        return True # indicate the lib was loaded
    except Exception as e:
        raise TOSDB_CLibError( "unable to initialize library", e )        
from django.contrib.sessions.middleware import SessionMiddleware
from django.http import HttpResponseForbidden, HttpResponseRedirect
from amsp import settings
from re import compile as _compile

__author__ = "mohammad"


EXEMPT_URLS = [_compile(settings.LOGIN_URL.lstrip("/"))]
if hasattr(settings, "LOGIN_EXEMPT_URLS"):
    EXEMPT_URLS += [_compile(expr) for expr in settings.LOGIN_EXEMPT_URLS]


class LoginRequiredMiddleware(SessionMiddleware):
    """
    Middleware that requires a user to be authenticated to view any page other
    than LOGIN_URL. Exemptions to this requirement can optionally be specified
    in settings via a list of regular expressions in LOGIN_EXEMPT_URLS (which
    you can copy from your urls.py).

    Requires authentication middleware and template context processors to be
    loaded. You'll get an error if they aren't.
    """

    def process_response(self, request, response):
        assert hasattr(
            request, "user"
        ), "The Login Required middleware\
 requires authentication middleware to be installed. Edit your\
 MIDDLEWARE_CLASSES setting to insert\
 'django.contrib.auth.middlware.AuthenticationMiddleware'. If that doesn't\
			            <a href="?99929___" class="">猴头菇</a>

			            <a href="?99933___" class="">草菇</a>

			            <a href="?99936___" class="">竹笋</a>

			            <a href="?99937___" class="">冬笋</a>

			            <a href="?99938___" class="">黄豆芽</a>

			            <a href="?99939___" class="">绿豆芽</a>

			            <a href="?99940___" class="">玉米棒</a>
"""
res_vage_number = r'<a href="(.*)" class='
res_vage_number_com = re._compile(res_vage_number,0)
res_vage_name = '<a href=".*" class="">(.*)</a>'
res_vage_name_com = re._compile(res_vage_name,0)
res_pro_number = r'<a href="(.*)" class='
res_pro_number_com = re._compile(res_pro_number,0)
res_pro_name = '<a href=".*" class="">(.*)</a>'
res_pro_name_com = re._compile(res_pro_name,0)
get_data_vage_number = re.findall(res_vage_number_com,str(get_data_vage))
get_data_vage_name = re.findall(res_vage_name_com,str(get_data_vage))
get_data_pro_number = re.findall(res_pro_number_com,str(get_data_pro))
get_data_pro_name = re.findall(res_pro_name_com,str(get_data_pro))
with open(r'C:\Users\Attack\Desktop\vage.txt','w+') as f:#  C:\Users\Attack\Desktop\name_price.txt
    d = dict(zip(get_data_vage_name,get_data_vage_number))
    d = pickle.dumps(d)
    f.write(d)
with open(r'C:\Users\Attack\Desktop\pro.txt','w+') as f:#  C:\Users\Attack\Desktop\name_price.txt
Exemple #13
0
 def update_event(self, inp=-1):
     self.set_output_val(0, re._compile(self.input(0), self.input(1)))
    def _add_or_update_endpoint(self, action, name, version, request_data):
        """
        Add or update an endpoint
        """
        self.logger.log(logging.DEBUG, f"Adding/updating model {name}...")

        _name_checker = _compile(r"^[a-zA-Z0-9-_\s]+$")
        if not isinstance(name, str):
            msg = "Endpoint name must be a string"
            self.logger.log(logging.CRITICAL, msg)
            raise TypeError(msg)

        if not _name_checker.match(name):
            raise gen.Return("endpoint name can only contain: a-z, A-Z, 0-9,"
                             " underscore, hyphens and spaces.")

        if self.settings.get("add_or_updating_endpoint"):
            msg = ("Another endpoint update is already in progress"
                   ", please wait a while and try again")
            self.logger.log(logging.CRITICAL, msg)
            raise RuntimeError(msg)

        request_uuid = random_uuid()
        self.settings["add_or_updating_endpoint"] = request_uuid
        try:
            description = (request_data["description"]
                           if "description" in request_data else None)
            if "docstring" in request_data:
                docstring = str(
                    bytes(request_data["docstring"],
                          "utf-8").decode("unicode_escape"))
            else:
                docstring = None
            endpoint_type = request_data[
                "type"] if "type" in request_data else None
            methods = request_data[
                "methods"] if "methods" in request_data else []
            dependencies = (request_data["dependencies"]
                            if "dependencies" in request_data else None)
            target = request_data[
                "target"] if "target" in request_data else None
            schema = request_data[
                "schema"] if "schema" in request_data else None

            src_path = request_data[
                "src_path"] if "src_path" in request_data else None
            target_path = get_query_object_path(
                self.settings[SettingsParameters.StateFilePath], name, version)
            self.logger.log(logging.DEBUG,
                            f"Checking source path {src_path}...")
            _path_checker = _compile(r"^[\\\:a-zA-Z0-9-_~\s/\.\(\)]+$")
            # copy from staging
            if src_path:
                if not isinstance(request_data["src_path"], str):
                    raise gen.Return("src_path must be a string.")
                if not _path_checker.match(src_path):
                    raise gen.Return(
                        "Endpoint source path name can only contain: "
                        "a-z, A-Z, 0-9, underscore, hyphens and spaces.")

                yield self._copy_po_future(src_path, target_path)
            elif endpoint_type != "alias":
                raise gen.Return("src_path is required to add/update an "
                                 "endpoint.")

            # alias special logic:
            if endpoint_type == "alias":
                if not target:
                    raise gen.Return("Target is required for alias endpoint.")
                dependencies = [target]

            # update local config
            try:
                if action == "add":
                    self.tabpy_state.add_endpoint(
                        name=name,
                        description=description,
                        docstring=docstring,
                        endpoint_type=endpoint_type,
                        methods=methods,
                        dependencies=dependencies,
                        target=target,
                        schema=schema,
                    )
                else:
                    self.tabpy_state.update_endpoint(
                        name=name,
                        description=description,
                        docstring=docstring,
                        endpoint_type=endpoint_type,
                        methods=methods,
                        dependencies=dependencies,
                        target=target,
                        schema=schema,
                        version=version,
                    )

            except Exception as e:
                raise gen.Return(f"Error when changing TabPy state: {e}")

            on_state_change(self.settings, self.tabpy_state,
                            self.python_service, self.logger)

        finally:
            self.settings["add_or_updating_endpoint"] = None
# *-*coding:utf-8*-*
'''
@version:python2.7.11
@author:Attack
@time: 2016/2/17 001719:54
'''
import re
import urllib2
import sys
url = 'http://jiage.shucaiyuan.com/'
html = urllib2.urlopen(url)
data = html.read()
res_number = r'<a href="(.*)" class='
res_number_com = re._compile(res_number,0)
res_name = '<a href=".*" class="">(.*)</a>'
res_name_com = re._compile(res_name,0)
get_data_number = re.findall(res_number_com,data)
get_data_name = re.findall(res_name_com,data)
new =[]
for i in get_data_name:
    i=i.decode('utf-8')#转换name中的unicode为中文
    new.append(i)
d=dict(zip(new,get_data_number))
reload(sys)            #貌似是重加载系统文字设置?不管能用就行
sys.setdefaultencoding("utf-8")
with open(r'C:\Users\liuyunhai\Desktop\name_price.txt','r+') as f: #  C:\Users\Attack\Desktop\name_price.txt
    for k,v in d.items():
        key = str(k)+':'+str(v)+'\n'
        f.write(key)
Exemple #16
0
from urllib.parse import (urlparse, urljoin)
from html import unescape
from re import compile as _compile

from requests import RequestException

from .request import request
from .status import Status
from .exceptions import (
    DeadlinksIgnoredURL,
    DeadlinksRedirectionURL,
)

# -- Constants -----------------------------------------------------------------

__RE_LINKS__ = _compile(r'<a\s{1}([^>]+)>') # pylint: disable=W1401

# filters
CLEANER = lambda x: x.strip("\"'\n ") # removes quotes, spaces and new lines
ANCHORS = lambda x: x.split("#")[0] # removed part after anchor
UNESCPE = lambda x: unescape(x) # pylint: disable=W0108


class URL:
    """ URL abstraction representation. """

    def __init__(self, location: str) -> None:
        # print(urlparse(location))
        self._url = urlparse(location)
        self._status = Status.UNDEFINED # type: Status
Exemple #17
0
# encoding:utf-8
#! python3
# renameDates.py - Rename filenames with American MM-DD-YYYY date format to Chinese YYYY-MM-DD
import re,shutil,os
workdir='C:\\Temp\\'
#Create a regex that matches files with American date format.
datePattern=re._compile(r'''^(.*?)     # all text before the date
((0|1)?\d)-                            # One or two digits for the month
((0|1|2|3)?\d)-                        # One or two digits for the day
((19|20)\d\d)                          # Four digits for the year
(.*?)$                                 # all text after the date 
''',re.VERBOSE)# 传入VERBOSE参数则允许正则表达式允许空白字符和注释,增加可读性
# TODO:Loop over the files in the working directory
for amerFilename in os.listdir(workdir):
    mo=datePattern.search(amerFilename)
# TODO:Skip files without a date.
    if mo==None:
        continue
# TODO: Get different parts of the filename.
    beforePart=mo.group(1)
    monthPart=mo.group(2)
    dayPart=mo.group(4)
    yearPart=mo.group(6)
    afterPart=mo.group(8)
# TODO: Form the Chinese-style filename.
    chnFilename=beforePart+yearPart+'-'+monthPart+'-'+dayPart+afterPart    
# TODO: Get the full,absolute file paths.
    amerFilename=os.path.join(workdir,amerFilename)
    chnFilename=os.path.join(workdir,chnFilename)
# TODO: Rename the files.
    print('Renamingg %s to %s...'%(amerFilename,chnFilename))
Exemple #18
0
     listdir as _listdir, sep as _sep
from re import compile as _compile, search as _search, match as _match, \
     split as _split
from ctypes import WinDLL as _WinDLL, cast as _cast, pointer as _pointer, \
     create_string_buffer as _BUF_, POINTER as _PTR_, c_double as _double_, \
     c_float as _float_, c_ulong as _ulong_, c_long as _long_, \
     c_longlong as _longlong_, c_char_p as _str_, c_char as _char_, \
     c_ubyte as _uchar_, c_int as _int_, c_void_p as _pvoid_, c_uint as _uint_
_pchar_ = _PTR_(_char_)
_ppchar_ = _PTR_(_pchar_)

DLL_BASE_NAME = "tos-databridge"
SYS_ARCH_TYPE = "x64" if (_log(_maxsize * 2, 2) > 33) else "x86"
MIN_MARGIN_OF_SAFETY = 10

_REGEX_NON_ALNUM = _compile("[\W+]")
_REGEX_DLL_NAME = _compile('^(' + DLL_BASE_NAME + '-)[\d]{1,2}.[\d]{1,2}-' +
                           SYS_ARCH_TYPE + '(.dll)$')

_dll = None

### we added a lock to the _call from VTOSDB_DataBlock
### how do we want to handle concurrent calls at this level ???


def init(dllpath=None, root="C:\\", bypass_check=False):
    """ Initialize the underlying tos-databridge DLL

    dllpath: string of the exact path of the DLL
    root: string of the directory to start walking/searching to find the DLL
    """
Exemple #19
0
    GithubRateLimitException,
    GithubRateLimitWarning,
)
from airfs.storage.http import (
    HTTPRawIO as _HTTPRawIO,
    HTTPBufferedIO as _HTTPBufferedIO,
)

__all__ = [
    "GithubRateLimitException",
    "GithubRateLimitWarning",
    "GithubRawIO",
    "GithubBufferedIO",
]

_RAW_GITHUB = _compile(r"^https?://raw\.githubusercontent\.com")


class _GithubSystem(_SystemBase):
    """
    GitHub system.

    Args:
        storage_parameters (dict): "github.MainClass.Github" keyword arguments.
    """

    SUPPORTS_SYMLINKS = True

    _SIZE_KEYS = (
        "size",
        "Content-Length",
Exemple #20
0
def GetUrl2(html):
    RegUrl = r'<a href="(*?)" title'
    Urlre = re._compile(RegUrl)
    Url2List = 'http://tieba.baidu.com' + re.findall(Urlre,html)
    return Url2List
Exemple #21
0
        n = self.name
        if _path.exists(n):
            n = _path.basename(n)
        return DataHolder([(n, d)], warn=warn)


class NumPySaver(PythonSaver):
    def save(self, data):
        import numpy as np  #@UnresolvedImport
        if len(list(data.items())) > 1:
            print('Only saving first dataset')
        np.save(self.name, data[0])


from re import compile as _compile
_begin_number = _compile(r'^[-+]?[\d]?\.?\d')


class SRSLoader(PythonLoader):
    '''
    Loads an SRS dat file and returns a dataholder object
    '''
    def load(self, warn=True):
        '''
        warn        -- if True (default), print warnings about key names

        Returns a DataHolder object
        '''

        f = open(self.name)
Exemple #22
0
    def _add_or_update_endpoint(self, action, name, version, request_data):
        '''
        Add or update an endpoint
        '''
        logging.debug("Adding/updating model {}...".format(name))
        _name_checker = _compile('^[a-zA-Z0-9-_\\s]+$')
        if not isinstance(name, (str, unicode)):
            log_and_raise("Endpoint name must be a string or unicode",
                          TypeError)

        if not _name_checker.match(name):
            raise gen.Return('endpoint name can only contain: a-z, A-Z, 0-9,'
                             ' underscore, hyphens and spaces.')

        if self.settings.get('add_or_updating_endpoint'):
            log_and_raise(
                "Another endpoint update is already in progress"
                ", please wait a while and try again", RuntimeError)

        request_uuid = random_uuid()
        self.settings['add_or_updating_endpoint'] = request_uuid
        try:
            description = (request_data['description']
                           if 'description' in request_data else None)
            if 'docstring' in request_data:
                if sys.version_info > (3, 0):
                    docstring = str(
                        bytes(request_data['docstring'],
                              "utf-8").decode('unicode_escape'))
                else:
                    docstring = request_data['docstring'].decode(
                        'string_escape')
            else:
                docstring = None
            endpoint_type = (request_data['type']
                             if 'type' in request_data else None)
            methods = (request_data['methods']
                       if 'methods' in request_data else [])
            dependencies = (request_data['dependencies']
                            if 'dependencies' in request_data else None)
            target = (request_data['target']
                      if 'target' in request_data else None)
            schema = (request_data['schema']
                      if 'schema' in request_data else None)

            src_path = (request_data['src_path']
                        if 'src_path' in request_data else None)
            target_path = get_query_object_path(
                self.settings['state_file_path'], name, version)
            _path_checker = _compile('^[\\a-zA-Z0-9-_\\s/]+$')
            # copy from staging
            if src_path:
                if not isinstance(request_data['src_path'], (str, unicode)):
                    raise gen.Return("src_path must be a string.")
                if not _path_checker.match(src_path):
                    raise gen.Return('Endpoint name can only contain: a-z, A-'
                                     'Z, 0-9,underscore, hyphens and spaces.')

                yield self._copy_po_future(src_path, target_path)
            elif endpoint_type != 'alias':
                raise gen.Return("src_path is required to add/update an "
                                 "endpoint.")

            # alias special logic:
            if endpoint_type == 'alias':
                if not target:
                    raise gen.Return('Target is required for alias endpoint.')
                dependencies = [target]

            # update local config
            try:
                if action == 'add':
                    self.tabpy_state.add_endpoint(name=name,
                                                  description=description,
                                                  docstring=docstring,
                                                  endpoint_type=endpoint_type,
                                                  methods=methods,
                                                  dependencies=dependencies,
                                                  target=target,
                                                  schema=schema)
                else:
                    self.tabpy_state.update_endpoint(
                        name=name,
                        description=description,
                        docstring=docstring,
                        endpoint_type=endpoint_type,
                        methods=methods,
                        dependencies=dependencies,
                        target=target,
                        schema=schema,
                        version=version)

            except Exception as e:
                raise gen.Return("Error when changing TabPy state: %s" % e)

            on_state_change(self.settings, self.tabpy_state,
                            self.python_service)

        finally:
            self.settings['add_or_updating_endpoint'] = None
Exemple #23
0
def workload_parser(workload_line, attrs=None, avoid_data_tokens=[';']):
    """ 
    
        Attributes of each workload line in a SWF format (separated by space):
        
        1. job_number -- a counter field, starting from 1.
        2. submit_time -- in seconds. The earliest time the log refers to is zero, and is usually the submittal time of the first job. The lines in the log are sorted by ascending submittal times. It makes sense for jobs to also be numbered in this order.
        3. wait_time -- in seconds. The difference between the job's submit time and the time at which it actually began to run. Naturally, this is only relevant to real logs, not to models.
        4. duration -- in seconds. The wall clock time the job was running (end time minus start time).
        5. allocated_processors -- an integer. In most cases this is also the number of processors the job uses; if the job does not use all of them, we typically don't know about it.
        6. avg_cpu_time -- Time Used for both user and system, in seconds. This is the average over all processors of the CPU time used, and may therefore be smaller than the wall clock runtime. If a log contains the total CPU time used by all the processors, it is divided by the number of allocated processors to derive the average.
        7. used_memory -- in kilobytes. This is again the average per processor.
        8. requested_number_processors --- Requested Number of Processors.
        9. requested_time -- This can be either runtime (measured in wallclock seconds), or average CPU time per processor (also in seconds) -- the exact meaning is determined by a header comment. In many logs this field is used for the user runtime estimate (or upper bound) used in backfilling. If a log contains a request for total CPU time, it is divided by the number of requested processors.
        10. requested_memory -- Requested memory in kilobytes per processor.
        11. status -- 1 if the job was completed, 0 if it failed, and 5 if cancelled. If information about chekcpointing or swapping is included, other values are also possible. See usage note below. This field is meaningless for models, so would be -1.
        12. user_id -- a natural number, between one and the number of different users.
        13. group_id -- a natural number, between one and the number of different groups. Some systems control resource usage by groups rather than by individual users.
        14. executable_number -- a natural number, between one and the number of different applications appearing in the workload. in some logs, this might represent a script file used to run jobs rather than the executable directly; this should be noted in a header comment.
        15. queue_number -- a natural number, between one and the number of different queues in the system. The nature of the system's queues should be explained in a header comment. This field is where batch and interactive jobs should be differentiated: we suggest the convention of denoting interactive jobs by 0.
        16. partition_number -- a natural number, between one and the number of different partitions in the systems. The nature of the system's partitions should be explained in a header comment. For example, it is possible to use partition numbers to identify which machine in a cluster was used.
        17. preceding_job_number -- this is the number of a previous job in the workload, such that the current job can only start after the termination of this preceding job. Together with the next field, this allows the workload to include feedback as described below.
        18. think_time_prejob -- this is the number of seconds that should elapse between the termination of the preceding job and the submittal of this one.
        
        :param workload_line: A Line of the workload file
        :param attrs: List of attributes to be considered. Default None, all attributes will be considered.
        :param avoid_data_tokens: List of tokens to avoid the line
        
        :return: A dictionary with all the attributes requested. If the line is returned it means that the line has the token to avoid.     
    
    """
    if workload_line[0] in avoid_data_tokens:
        return workload_line
    _common_int_pattern = ('\s*(?P<{}>[-+]?\d+)', int)
    _common_float_pattern = ('\s*(?P<{}>[-+]?\d+\.\d+|[-+]?\d+)', float)
    _dict = {
        'job_number': _common_int_pattern,
        'submit_time': _common_int_pattern,
        'wait_time': _common_int_pattern,
        'duration': _common_int_pattern,
        'allocated_processors': _common_int_pattern,
        'avg_cpu_time': _common_float_pattern,
        'used_memory': _common_int_pattern,
        'requested_number_processors': _common_int_pattern,
        'requested_time': _common_int_pattern,
        'requested_memory': _common_int_pattern,
        'status': _common_int_pattern,
        'user_id': _common_int_pattern,
        'group_id': _common_int_pattern,
        'executable_number': _common_int_pattern,
        'queue_number': _common_int_pattern,
        'partition_number': _common_int_pattern,
        'preceding_job_number': _common_int_pattern,
        'think_time_prejob': _common_int_pattern
    }
    _sequence = _dict.keys() if not attrs else ((
        attrs, ) if isinstance(attrs, str) else attrs)
    reg_exp = r''
    for _key in _sequence:
        reg_exp += _dict[_key][0].format(_key)
    p = _compile(reg_exp)
    _matches = p.match(workload_line)
    _dict_line = _matches.groupdict()
    return {key: _dict[key][1](_dict_line[key]) for key in _sequence}
Exemple #24
0
    cat *.malt | ./malt2connlX.py > output.conll

NOTE: Beware of nasty Windows newlines:

    dos2unix *.malt

Author:     Pontus Stenetorp    <pontus stenetorp se>
Version:    2011-12-05
"""

from re import compile as _compile
from sys import stdin, stdout

# Constants
MALT_REGEX = _compile(r'^(?P<token>.*?)\t(?P<pos>[^\t]+)\t'
                      r'(?P<head>[^\t]+)\t(?P<rel>[^\t]+)$')
# NOTE: My interpretation from reversing the format by example
OUTPUT_LINE = '{token_num}\t{token}\t_\t{pos}\t{pos}\t_\t{head}\t{rel}\t_\t_'
###


def main(args):
    token_cnt = 0
    for line in (l.decode('utf-8').rstrip('\n') for l in stdin):
        if not line:
            # Done with the sentence
            token_cnt = 0
            stdout.write('\n')
            continue
        else:
            token_cnt += 1
Exemple #25
0
     listdir as _listdir, sep as _sep
from re import compile as _compile, search as _search, match as _match, \
     split as _split
from ctypes import WinDLL as _WinDLL, cast as _cast, pointer as _pointer, \
     create_string_buffer as _BUF_, POINTER as _PTR_, c_double as _double_, \
     c_float as _float_, c_ulong as _ulong_, c_long as _long_, \
     c_longlong as _longlong_, c_char_p as _str_, c_char as _char_, \
     c_ubyte as _uchar_, c_int as _int_, c_void_p as _pvoid_, c_uint as _uint_
_pchar_ = _PTR_( _char_ )
_ppchar_ = _PTR_( _pchar_ )   

DLL_BASE_NAME = "tos-databridge"
SYS_ARCH_TYPE = "x64" if ( _log( _maxsize * 2, 2) > 33 ) else "x86"
MIN_MARGIN_OF_SAFETY = 10

_REGEX_NON_ALNUM = _compile("[\W+]")
_REGEX_DLL_NAME = _compile('^('+DLL_BASE_NAME 
                          + '-)[\d]{1,2}.[\d]{1,2}-'
                          + SYS_ARCH_TYPE +'(.dll)$')
           
_dll = None

### we added a lock to the _call from VTOSDB_DataBlock
### how do we want to handle concurrent calls at this level ???
        
def init(dllpath = None, root = "C:\\", bypass_check=False):
    """ Initialize the underlying tos-databridge DLL

    dllpath: string of the exact path of the DLL
    root: string of the directory to start walking/searching to find the DLL
    """  
Exemple #26
0
with open(r'C:\Users\Attack\Desktop\pro.txt','r+') as f:
    pro_name =pickle.loads(f.read())
with open(r'C:\Users\Attack\Desktop\vage.txt','r+') as f:
    vage_name =pickle.loads(f.read())
root_url = 'http://jiage.shucaiyuan.com/'
url_base,data = [],{}
pro_and_vage_name_base = []
for k,v in pro_name.items():
    for i,x in vage_name.items():
        pro_and_vage_name = k+i
        new_url = root_url+x.replace('___','')+v.strip('?').replace('__','_0_0')  #拼接URL
        url_base.append(new_url)
        pro_and_vage_name_base.append(pro_and_vage_name)
res_mon = '<th>(.*)</th>'
res_pri = '<td>(.*)</td>'
res_mon_com = re._compile(res_mon)
res_pri_com = re._compile(res_pri)
month = []
price = []
for i in xrange(100):
    url,name = url_base[i],pro_and_vage_name_base[i]
    soup = BeautifulSoup(urllib2.urlopen(url), "html.parser") #  通过html.parser解析url对应的页面,转换为BeautifulSoup对象
    month = list(re.findall(soup,res_mon_com))  #获取 月份 并转换为列表
    price = list(re.findall(soup,res_pri_com))#获取 价格 并转换为列表
    data[name] =
with open(r'C:\Users\Attack\Desktop\data.txt','w+') as f:
    f.write(pickle.dumps(data))
end = time.clock()
print 'time',end-satrt

Exemple #27
0
def str2date(value):
    return _str2date(value)


str2percent = _str2percent


def isnan(value):
    try:
        return _isnan(value)
    except TypeError:
        return False


# following masks are used to recognize string patterns
FLOAT_MASK = _compile(r'^[-+]?[0-9]\d*\.\d*$|[-+]?\.?[0-9]\d*$')
PERCENT_MASK = _compile(r'^[-+]?[0-9]\d*\.\d*%$|[-+]?\.?[0-9]\d*%$')
INT_MASK = _compile(r'^[-+]?[-0-9]\d*$')
DATE_MASK = _compile(
    '^(?:(?!0000)[0-9]{4}([-/.]?)(?:(?:0?[1-9]|1[0-2])([-/.]?)(?:0?[1-9]|1[0-9]|2[0-8])|(?:0?[13-9]|1[0-2])([-/.]?)(?:29|30)|(?:0?[13578]|1[02])([-/.]?)31)|(?:[0-9]{2}(?:0[48]|[2468][048]|[13579][26])|(?:0[48]|[2468][048]|[13579][26])00)([-/.]?)0?2([-/.]?)29)$'
)
BOOL_MASK = _compile(
    '^(true)|(false)|(yes)|(no)|(\u662f)|(\u5426)|(on)|(off)$')


def auto_str2value(value, dtype=None):
    '''using preview masks to auto transfer a string to matchest date type

    Parameters
    ----------
    value : str 
Exemple #28
0
    def parse_detail(self, response):

        item = {}
        offer = CrawlendItem()
        firm = FirmItem()
        soup = bs4.BeautifulSoup(response.body, 'lxml')
        offer['url'] = response.url
        offer['resource'] = '前程无忧'

        # 职位名称,公司信息
        # ‘class’是python关键字所以用‘class_’
        # strip=True 过滤字符后面的空格
        soup_cn = soup.find('div', class_='cn')
        offer['name'] = soup_cn.find('h1').get_text(strip=True)
        offer['work_place'] = soup_cn.find('span', class_='lname').get_text(strip=True)
        firm['work_place'] = offer['work_place']

        # 薪水
        # 正则式:[万千]匹配'万'或者'千';(\d+\.?\d*)匹配小数格式
        p_salary_1 = re._compile(r'(\d+\.?\d*)-(\d+\.?\d*)[万千]')
        str_salary = soup_cn.find('strong').get_text(strip=True)
        r = re.match(p_salary_1, str_salary)
        # 数据单位统一处理
        if r:
            if '万' in str_salary:
                lst_r = [float(i) * 10000 for i in r.groups()]
            else:
                lst_r = [float(i) * 1000 for i in r.groups()]
            offer['salary_from'], offer['salary_to'] = lst_r
        else:
            offer['is_negotiable'] = True

        # 公司名
        firm['firm_name'] = soup_cn.find('p', class_='cname').get_text(strip=True)
        # 行业规模
        soup_firm_msg = soup_cn.find('p', class_='msg ltype')
        str_msg = soup_firm_msg.string.replace('&nbsp;', '')
        nature, scale, indus = [i.strip() for i in str_msg.split('|')]

        # 企业性质
        # 编码规范化入库
        if '外企' in nature:
            firm['firm_nature'] = '2'
        elif '合资' in nature:
            firm['firm_nature'] = '3'
        elif '国企' in nature:
            firm['firm_nature'] = '1'
        elif '民营' in nature:
            firm['firm_nature'] = '4'
        elif '上市' in nature:
            firm['firm_nature'] = '7'
        else:
            firm['firm_nature'] = '5'

        # 企业规模
        p_scale = re.compile(r'\d+')
        lst_scale = re.findall(p_scale, scale)
        lst_scale = [int(i) for i in lst_scale]
        lst_scale.sort()
        if len(lst_scale) == 2:
            firm['firm_scale_from'], firm['firm_scale_to'] = lst_scale
        elif len(lst_scale) == 1:
            firm['firm_scale_from'] = lst_scale[0]

        # 行业
        firm['firm_industry'] = indus

        # 职位要求
        soup_job_qua = soup.find('div', class_= 'jtag inbox')
        soup_qua_div = soup_job_qua.find('div', class_='t1')
        soup_qua_span = soup_qua_div.find_all('span')
        # 正则式匹配经验年限
        p_exp = re.compile(r'\d+')
        # 正则式匹配发布时间
        p_date = re.compile(r'\d+-\d+')
        today = datetime.date.today()
        year_ = today.year
        for span in soup_qua_span:
            text = span.get_text(strip=True)
            # 处理经验
            if '经验' in text:
                r_exp = re.findall(p_exp, text)
                r_exp.sort()  # 排序
                if len(r_exp) == 2:
                    offer['years_of_work_from'], offer['years_of_work_to'] = r_exp
                elif len(r_exp) == 1:
                    offer['years_of_work_from'] = r_exp[0]
            # 学历
            if '高中' in text:
                offer['degree'] = '1'
            elif '大专' in text:
                offer['degree'] = '2'
            elif '本科' in text:
                offer['degree'] = '3'
            elif '研究生' in text:
                offer['degree'] = '4'
            elif '博士' in text:
                offer['degree'] = '5'
            # 招聘人数
            if '招聘':
                r_mem = re.findall(p_exp, text)
                if len(r_mem) == 1:
                    offer['member'] = r_mem[0]
            # 发布时间
            if '发布':
                r_date = re.findall(p_date, text)
                if r_date:
                    date_ = str(year_) + '-' + r_date[0]
                    offer['release'] = datetime.datetime.strptime(date_, "%Y-%m-%d").date()

        offer.setdefault('degree', '6')
        offer.setdefault('release', today)
        # 职位诱惑
        soup_r = soup_job_qua.find('p', class_='t2')
        if soup_r:
            offer['temptation'] = soup_r.get_text(';', strip=True)
        soup_qua = soup.find('div', class_='bmsg job_msg inbox')
        # 职位描述
        offer['description'] = soup_qua.get_text(strip=True).replace('举报', '').replace('分享', '')

        # 公司地址
        soup_loc_div = soup.find('div', class_='bmsg inbox')
        soup_loc_p = soup_loc_div.find('p', class_='fp')
        firm['firm_location'] = [i for i in soup_loc_p.stripped_strings][-1]

        # 公司简介
        soup_intro = soup.find('div', class_='tmsg inbox')
        firm['firm_introduction'] = soup_intro.get_text(strip=True)

        item['offer'] = offer
        item['firm'] = firm
        yield item
Exemple #29
0
    RESTServiceClient as _RESTServiceClient,
    Endpoint as _Endpoint,
    AliasEndpoint as _AliasEndpoint,
)

from .custom_query_object import CustomQueryObject as \
    _CustomQueryObject

import os as _os


import logging as _logging
_logger = _logging.getLogger(__name__)


_name_checker = _compile('^[a-zA-Z0-9-_\ ]+$')

if sys.version_info.major == 3:
    unicode = str


def _check_endpoint_name(name):
    """Checks that the endpoint name is valid by comparing it with an RE and
    checking that it is not reserved."""
    if not isinstance(name, (str,unicode)):
        raise TypeError("Endpoint name must be a string or unicode")

    if name == '':
        raise ValueError("Endpoint name cannot be empty")

    if not _name_checker.match(name):
Exemple #30
0
        d = np.load(self.name)
        import os.path as _path
        n = self.name
        if _path.exists(n):
            n = _path.basename(n)
        return DataHolder([(n, d)], warn=warn)

class NumPySaver(PythonSaver):
    def save(self, data):
        import numpy as np #@UnresolvedImport
        if len(data.items()) > 1:
            print 'Only saving first dataset'
        np.save(self.name, data[0])

from re import compile as _compile
_begin_number = _compile(r'^[-+]?[\d]?\.?\d')

class SRSLoader(PythonLoader):
    '''
    Loads an SRS dat file and returns a dataholder object
    '''
    def load(self, warn=True):
        '''
        warn        -- if True (default), print warnings about key names

        Returns a DataHolder object
        '''

        f = open(self.name)
    
        try:
'''
This module provides functionality required for managing endpoint objects in
TabPy. It provides a way to download endpoint files from remote
and then properly cleanup local the endpoint files on update/remove of endpoint
objects.

The local temporary files for TabPy will by default located at
    /tmp/query_objects

'''
import logging
import os
import shutil
from re import compile as _compile

_name_checker = _compile(r'^[a-zA-Z0-9-_\s]+$')


def _check_endpoint_name(name, logger=logging.getLogger(__name__)):
    """Checks that the endpoint name is valid by comparing it with an RE and
    checking that it is not reserved."""
    if not isinstance(name, str):
        msg = 'Endpoint name must be a string'
        logger.log(logging.CRITICAL, msg)
        raise TypeError(msg)

    if name == '':
        msg = 'Endpoint name cannot be empty'
        logger.log(logging.CRITICAL, msg)
        raise ValueError(msg)
Exemple #32
0
    def _add_or_update_endpoint(self, action, name, version, request_data):
        '''
        Add or update an endpoint
        '''
        _name_checker = _compile('^[a-zA-Z0-9-_\ ]+$')
        if not isinstance(name, (str,unicode)):
            raise TypeError("Endpoint name must be a string or unicode")

        if not _name_checker.match(name):
            raise gen.Return('endpoint name can only contain: a-z, A-Z, 0-9,'
            ' underscore, hyphens and spaces.')

        if self.settings.get('add_or_updating_endpoint'):
            raise RuntimeError("Another endpoint update is already in progress, "
                                "please wait a while and try again")

        request_uuid = random_uuid()
        self.settings['add_or_updating_endpoint'] = request_uuid
        try:
            description = request_data['description'] if 'description' in request_data else None
            if 'docstring' in request_data:
                if sys.version_info > (3, 0):
                    docstring = str(bytes(request_data['docstring'], "utf-8").decode('unicode_escape'))
                else:
                    docstring = request_data['docstring'].decode('string_escape')
            else:
                docstring=None
            endpoint_type = request_data['type'] if 'type' in request_data else None
            methods = request_data['methods'] if 'methods' in request_data else []
            dependencies = request_data['dependencies'] if 'dependencies' in request_data else None
            target = request_data['target'] if 'target' in request_data else None
            schema = request_data['schema'] if 'schema' in request_data else None

            src_path = request_data['src_path'] if 'src_path' in request_data else None
            target_path = get_query_object_path(self.settings['state_file_path'], name, version)
            _path_checker = _compile('^[\\a-zA-Z0-9-_\ /]+$')
            # copy from staging
            if src_path:
                if not isinstance(request_data['src_path'], (str,unicode)):
                    raise gen.Return("src_path must be a string.")
                if not _path_checker.match(src_path):
                    raise gen.Return('Endpoint name can only contain: a-z, A-Z, 0-9,underscore, hyphens and spaces.')

                yield self._copy_po_future(src_path, target_path)
            elif endpoint_type != 'alias':
                    raise gen.Return("src_path is required to add/update an endpoint.")

            # alias special logic:
            if endpoint_type == 'alias':
                if not target:
                    raise gen.Return('Target is required for alias endpoint.')
                dependencies = [target]

            # update local config
            try:
                if action == 'add':
                    self.tabpy.add_endpoint(
                        name=name,
                        description=description,
                        docstring=docstring,
                        endpoint_type=endpoint_type,
                        methods=methods,
                        dependencies=dependencies,
                        target=target,
                        schema=schema)
                else:
                    self.tabpy.update_endpoint(
                        name=name,
                        description=description,
                        docstring=docstring,
                        endpoint_type=endpoint_type,
                        methods=methods,
                        dependencies=dependencies,
                        target=target,
                        schema=schema,
                        version=version)

            except Exception as e:
                raise gen.Return("Error when changing TabPy state: %s" % e)

            on_state_change(self.settings)

        finally:
            self.settings['add_or_updating_endpoint'] = None
Exemple #33
0
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This module implements base64 encoding-related functionality.
"""

from __future__ import (
    absolute_import, )

from re import (
    compile as _compile, )

from binascii import (
    Error, )

from base64 import (
    b64decode as _b64decode, )

_b64decode_validator = _compile(b'^[A-Za-z0-9-_]*={0,2}$')


def urlsafe_b64decode(s):
    """
    Like ``base64.b64decode`` but with validation.
    """
    if not _b64decode_validator.match(s):
        raise Error('Non-base64 digit found')
    return _b64decode(s, altchars=b"-_")
Exemple #34
0
with open(r'C:\Users\Attack\Desktop\pro.txt','r+') as f:
    pro_name =pickle.loads(f.read())
with open(r'C:\Users\Attack\Desktop\vage.txt','r+') as f:
    vage_name =pickle.loads(f.read())
root_url = 'http://jiage.shucaiyuan.com/'
url_base,data = [],{}
pro_and_vage_name_base = []
for k,v in pro_name.items():
    for i,x in vage_name.items():
        pro_and_vage_name = k+i
        new_url = root_url+x.replace('___','')+v.strip('?').replace('__','_0_0')  #拼接URL
        url_base.append(new_url)
        pro_and_vage_name_base.append(pro_and_vage_name)
res_pro = '<th>(.*)</th>'
res_pri = '<td>(.*)</td>'
res_pro_com = re._compile(res_pro)
res_pri_com = re._compile(res_pri)
month = []
price = []
for i in xrange(100):
    url,name = url_base[i],pro_and_vage_name_base[i]
    soup = BeautifulSoup(urllib2.urlopen(url), "html.parser") #  通过html.parser解析url对应的页面,转换为BeautifulSoup对象
    month = list(re.findall(soup,res_pro_com))  #获取 月份 并转换为列表
    price = list(re.findall(soup,res_pri_com))#获取 价格 并转换为列表
    l_price = l_price[14:26]             #剔除 价格
    del l_month[0]                       #剔除表头
    d = dict(zip(l_month,l_price)).copy()
    data[name] = d
with open(r'C:\Users\Attack\Desktop\data.txt','w+') as f:
    f.write(pickle.dumps(data))
end = time.clock()
from constants import BAD_REQUEST, DENIED, NOT_FOUND
from database import query_all, save_to_db
from response_caching import cache
from util import js_time, map_to_list, safe_int, sanitize

from .common import (
    get_ques_by_id,
    get_user_by_id,
    post_level_up_webhook,
    post_incorrect_webhook,
    save_log_to_file_system,
    run_in_thread,
)

pid = "halocrypt"  # getpid()
replace = _compile(r"\s").sub

no_question = lambda idx: {"game_over": True}


def clean_node(a):
    x = a.as_json
    x.pop("secure_data")
    return x


# LEADERBOARD_LIMIT = 100


@cache(lambda: f"{pid}_leaderboard_temp_cache")
def generate_leaderboard():
Exemple #36
0
_cast_cstr = lambda x: _cast(x, _str_).value.decode()

_gen_str_buffers = lambda sz, n: [_BUF_(sz) for _ in range(n)]
_gen_str_buffers_ptrs = lambda bufs: (_pchar_ * len(bufs))(
    *[_cast(b, _pchar_) for b in bufs])

_map_cstr = _partial(map, _cast_cstr)
_map_dt = _partial(map, TOSDB_DateTime)
_zip_cstr_dt = lambda cstr, dt: zip(_map_cstr(cstr), _map_dt(dt))

DLL_BASE_NAME = "tos-databridge"
DLL_DEPENDS1_NAME = "_tos-databridge"
SYS_ARCH_TYPE = "x64" if (_log(_maxsize * 2, 2) > 33) else "x86"
MIN_MARGIN_OF_SAFETY = 10

_REGEX_NON_ALNUM = _compile("[\W+]")
_REGEX_LETTER = _compile("[a-zA-Z]")
_VER_SFFX = '[\d]{1,2}.[\d]{1,2}'
_REGEX_VER_SFFX = _compile('-' + _VER_SFFX + '-')

_REGEX_DLL_NAME = _compile('^(' + DLL_BASE_NAME + '-)' \
                                + _VER_SFFX + '-' \
                                + SYS_ARCH_TYPE +'(.dll)$')

_REGEX_DBG_DLL_PATH = _compile('^.+(' + DLL_BASE_NAME + '-)' \
                                      + _VER_SFFX + '-' \
                                      + SYS_ARCH_TYPE + '_d(.dll)$')

_dll = None
_dll_depend1 = None
Exemple #37
0
#                  Utility Functions
# ==================================================
from email.utils import parseaddr as _parseaddr
from functools import wraps as _wraps
from http import HTTPStatus
from json import dumps as _dumps
from re import compile as _compile
from time import time
from traceback import print_exc as _print_exc

from flask import Response as _Response
from flask import request as _request
from werkzeug.datastructures import Headers

# maybe only strip whitespace?
_sub = _compile(r"([^\w])").sub
sanitize = lambda x: _sub("", f"{x}").strip().lower()


def validate_email_address(email_id: str) -> str:
    if not email_id:
        raise AppException("Invalid email")
    email_id = email_id.lower()
    if "@" in _parseaddr(email_id)[1]:
        return email_id
    raise AppException("Invalid Email", HTTPStatus.BAD_REQUEST)


def get_origin(headers: Headers, or_="*") -> str:
    """
    for CORS requests
class Version:
    """
    Version.

    Args:
        version (str): Version..
        pre (bool): If True, and no prerelease specified, is always
            lower than any other prerelease when comparing.
    """
    # Semantic version regex
    _RE = _compile(
        # Handle proper "major.minor.patch",
        # but also 'major' or 'major.minor' cases
        r'^(?P<major>0|[1-9]\d*)?'
        r'(?P<minor>\.(0|[1-9]\d*))?'
        r'(?P<patch>\.(0|[1-9]\d*))?'
        # Handle properly formatted prereleases and builds
        r'(?P<prerelease>-(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)'
        r'(\.(0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*)?'
        r'(?P<build>\+[0-9a-zA-Z-]+(\.[0-9a-zA-Z-]+)*)?'
        # Keep extra trailling non semantic versionning characters.
        r'(?P<trail>.*)?$')

    # Prerelease and build characters filter
    _FILTER = _compile(r'[^a-zA-Z0-9-.]')

    # Prerelease comparison behavior
    _PRE_COMPARE = {
        # Use an empty value to to ensure stable < prerelease
        True: (),
        # Use an ASCII table late character to ensure stable > prerelease
        False: ('~', )
    }

    def __init__(self, version, pre=False):
        self._version = parts = {
            key: value
            for key, value in self._RE.match(version).groupdict().items()
            if value
        }

        # Set if this version should be before or after prereleases
        self._pre = pre

        # Get core version number as integers
        for key in ('major', 'minor', 'patch'):
            parts[key] = int(parts.get(key, '0').lstrip('.'))

        # Remove delimiters
        for key in ('prerelease', 'build'):
            try:
                parts[key] = tuple(parts[key][1:].split('.'))
            except KeyError:
                continue

        # Try to handle trailing characters that does not match semantic version
        # as prerelease or build information to allow comparison
        try:
            prerelease = parts.pop('trail')
        except KeyError:
            pass
        else:
            # Get build information if any
            if 'build' not in parts:
                try:
                    prerelease, build = prerelease.split('+', 1)
                except ValueError:
                    pass
                else:
                    self.build = build

            self.prerelease = '.'.join(parts.get('prerelease',
                                                 ())) + prerelease

    def __lt__(self, other):
        return self._compare() < other._compare()

    def __le__(self, other):
        return self._compare() <= other._compare()

    def __eq__(self, other):
        return self._compare() == other._compare()

    def __ge__(self, other):
        return self._compare() >= other._compare()

    def __gt__(self, other):
        return self._compare() > other._compare()

    def __ne__(self, other):
        return self._compare() != other._compare()

    def _compare(self):
        """
        Comparable version.

        Returns:
            tuple: Comparable version.
        """
        ver = self._version
        return (ver['major'], ver['minor'], ver['patch'],
                ver.get('prerelease', self._PRE_COMPARE[self._pre]))

    @property
    def major(self):
        """
        Major version

        Returns:
            int: Major version.
        """
        return self._version['major']

    @major.setter
    def major(self, value):
        """
        Major version

        Args:
            value (int): New value.
        """
        self._version['major'] = int(value)

    @property
    def minor(self):
        """
        Minor version

        Returns:
            int: Minor version.
        """
        return self._version['minor']

    @minor.setter
    def minor(self, value):
        """
        Minor version

        Args:
            value (int): New value.
        """
        self._version['minor'] = int(value)

    @property
    def patch(self):
        """
        Patch version

        Returns:
            int: Patch version.
        """
        return self._version['patch']

    @patch.setter
    def patch(self, value):
        """
        Patch version

        Args:
            value (int): New value.
        """
        self._version['patch'] = int(value)

    @property
    def prerelease(self):
        """
        Prerelease version

        Returns:
            str: Prerelease version.
        """
        return '.'.join(self._version.get('prerelease', ()))

    @prerelease.setter
    def prerelease(self, value):
        """
        Prerelease version

        Args:
            value (str): New value.
        """
        self._version['prerelease'] = tuple(
            element.lstrip('0')
            for element in self._FILTER.sub('', value).strip('-.').split('.'))

    @property
    def build(self):
        """
        Build information.

        Returns:
            str: Build version.
        """
        return '.'.join(self._version.get('build', ()))

    @build.setter
    def build(self, value):
        """
        Build information.

        Args:
            value (str): New value.
        """
        self._version['build'] = tuple(
            self._FILTER.sub('', value).strip('.').split('.'))
Exemple #39
0
    def featurise(self, document, sentence, annotation):
        before_ann = sentence.text[:annotation.start].split()
        before_ann.reverse()
        after_ann = sentence.text[annotation.end:].split()

        for i, tok in izip(xrange(1, 4), before_ann):
            for f_tup in self.feature._featurise(tok):
                yield ('-BEFORE-{}-{}'.format(i, f_tup[0]), f_tup[1])
        for i, tok in izip(xrange(1, 4), after_ann):
            for f_tup in self.feature._featurise(tok):
                yield ('-AFTER-{}-{}'.format(i, f_tup[0]), f_tup[1])


from re import compile as _compile
DATE_REGEX = _compile(
    r'^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$')

#TODO: Window BoW!

### Features to capture NP internal performance


class SpanBoWFeature(object):
    def get_id(self):
        return 'SPAN-BOW'

    def featurise(self, document, sentence, annotation):
        span_text = sentence.annotation_text(annotation)

        for tok in span_text.split():
            yield (tok, 1)
Exemple #40
0
    cat *.malt | ./malt2connlX.py > output.conll

NOTE: Beware of nasty Windows newlines:

    dos2unix *.malt

Author:     Pontus Stenetorp    <pontus stenetorp se>
Version:    2011-12-05
'''

from sys import stdin, stdout
from re import compile as _compile
from codecs import open as _open

### Constants
MALT_REGEX = _compile(ur'^(?P<token>.*?)\t(?P<pos>[^\t]+)\t'
        ur'(?P<head>[^\t]+)\t(?P<rel>[^\t]+)$')
# NOTE: My interpretation from reversing the format by example
OUTPUT_LINE = u'{token_num}\t{token}\t_\t{pos}\t{pos}\t_\t{head}\t{rel}\t_\t_'
###

def main(args):
    token_cnt = 0
    for line in (l.decode('utf-8').rstrip('\n') for l in stdin):
        if not line:
            # Done with the sentence
            token_cnt = 0
            stdout.write('\n')
            continue
        else:
            token_cnt += 1
    def _add_or_update_endpoint(self, action, name, version, request_data):
        '''
        Add or update an endpoint
        '''
        self.logger.log(logging.DEBUG, f'Adding/updating model {name}...')

        _name_checker = _compile(r'^[a-zA-Z0-9-_\s]+$')
        if not isinstance(name, str):
            msg = 'Endpoint name must be a string'
            self.logger.log(logging.CRITICAL, msg)
            raise TypeError(msg)

        if not _name_checker.match(name):
            raise gen.Return('endpoint name can only contain: a-z, A-Z, 0-9,'
                             ' underscore, hyphens and spaces.')

        if self.settings.get('add_or_updating_endpoint'):
            msg = ('Another endpoint update is already in progress'
                   ', please wait a while and try again')
            self.logger.log(logging.CRITICAL, msg)
            raise RuntimeError(msg)

        request_uuid = random_uuid()
        self.settings['add_or_updating_endpoint'] = request_uuid
        try:
            description = (request_data['description']
                           if 'description' in request_data else None)
            if 'docstring' in request_data:
                docstring = str(
                    bytes(request_data['docstring'],
                          "utf-8").decode('unicode_escape'))
            else:
                docstring = None
            endpoint_type = (request_data['type']
                             if 'type' in request_data else None)
            methods = (request_data['methods']
                       if 'methods' in request_data else [])
            dependencies = (request_data['dependencies']
                            if 'dependencies' in request_data else None)
            target = (request_data['target']
                      if 'target' in request_data else None)
            schema = (request_data['schema']
                      if 'schema' in request_data else None)

            src_path = (request_data['src_path']
                        if 'src_path' in request_data else None)
            target_path = get_query_object_path(
                self.settings[SettingsParameters.StateFilePath], name, version)
            self.logger.log(logging.DEBUG,
                            f'Checking source path {src_path}...')
            _path_checker = _compile(r'^[\\\:a-zA-Z0-9-_~\s/\.]+$')
            # copy from staging
            if src_path:
                if not isinstance(request_data['src_path'], str):
                    raise gen.Return("src_path must be a string.")
                if not _path_checker.match(src_path):
                    raise gen.Return(
                        'Endpoint source path name can only contain: '
                        'a-z, A-Z, 0-9, underscore, hyphens and spaces.')

                yield self._copy_po_future(src_path, target_path)
            elif endpoint_type != 'alias':
                raise gen.Return("src_path is required to add/update an "
                                 "endpoint.")

            # alias special logic:
            if endpoint_type == 'alias':
                if not target:
                    raise gen.Return('Target is required for alias endpoint.')
                dependencies = [target]

            # update local config
            try:
                if action == 'add':
                    self.tabpy_state.add_endpoint(name=name,
                                                  description=description,
                                                  docstring=docstring,
                                                  endpoint_type=endpoint_type,
                                                  methods=methods,
                                                  dependencies=dependencies,
                                                  target=target,
                                                  schema=schema)
                else:
                    self.tabpy_state.update_endpoint(
                        name=name,
                        description=description,
                        docstring=docstring,
                        endpoint_type=endpoint_type,
                        methods=methods,
                        dependencies=dependencies,
                        target=target,
                        schema=schema,
                        version=version)

            except Exception as e:
                raise gen.Return(f'Error when changing TabPy state: {e}')

            on_state_change(self.settings, self.tabpy_state,
                            self.python_service, self.logger)

        finally:
            self.settings['add_or_updating_endpoint'] = None
Exemple #42
0
from re import search as _search
from typing import AnyStr as _AnyStr
from typing import List as _List
from typing import Union as _Union
from typing import Tuple as _Tuple

from . import miscellaneous as _utils

# ---------- Constants ----------

DEFAULT_EMBED_INLINE: bool = True

MAXIMUM_CHARACTERS: int = 1900
MAXIMUM_CHARACTERS_EMBED_DESCRIPTION: int = 2048

RX_DISCORD_INVITE: _Pattern = _compile(
    r'(?:https?://)?discord(?:(?:app)?\.com/invite|\.gg)/?[a-zA-Z0-9]+/?')

ZERO_WIDTH_SPACE: str = '\u200b'

# ---------- Functions ----------


def convert_color_string_to_embed_color(color_string: str) -> _Colour:
    if color_string:
        split_color_string = color_string.split(',')
        r, g, b = [int(c) for c in split_color_string]
        result = _Colour.from_rgb(r, g, b)
    else:
        result = _Embed.Empty
    return result
Exemple #43
0
    ord_dict = OrderedDict(
        sorted(freq.items(), key=lambda k: k[1], reverse=True)[:num]
    )

    try:
        size = list(range(len(ord_dict)))
        plt.clf()
        plt.bar(size, list(ord_dict.values()), align="center")
        plt.xticks(size, list(ord_dict.keys()))
        plt.autoscale()
        plt.savefig(path, bbox_inches="tight")
    except NameError:
        print(
            "Common n-grams where n = {}:".format(len(list(ord_dict.keys())[0]))
        )
        for i in ord_dict:
            step = max(ord_dict[i] for i in ord_dict) / len(ord_dict)
            bars = "▇" * int(ord_dict[i] / step)
            print("{}: {} ({})".format(i, bars, ord_dict[i]))


if __name__ == "__main__":
    if len(argv) != 2:
        raise SystemExit("Invalid number of parameters!")
    with open(argv[1]) as raw:
        TEXT = "".join(i.lower().replace("\n", " ") for i in raw.readlines())
        SIMPLE = _compile("[^a-z ]+").sub("", TEXT)

        for n, m in zip([1, 2, 3], [26, 20, 15]):
            show_output(ngram_frequency(n, SIMPLE), m, "n{}.png".format(n))
Exemple #44
0
from re import compile as _compile

from .decorators import parser as _parser, spliter as _spliter
from .parse import parse as _parse

ALIAS_SPLIT = _compile(r"[\[./\\\]>]")
CAMEL_CASE_SPACE1 = _compile(r"([A-Z])([A-Z][a-z]+)")
CAMEL_CASE_SPACE2 = _compile(r"([a-z\d])([A-Z])")
CAPITALS = _compile(r"([A-Z])([A-Z])")
CAPITAL_LOWER = _compile(r"([A-Z])([a-z])")
INVALID_CHARS = _compile(r"\W")
INVALID_LEAD = _compile(r"^[^a-zA-Z]+")
REPLACEABLE_WITH_UNDERSCORE = _compile(r"[\s/,.+-]+")
SPACE = _compile(r" ")
UNDERSCORE = _compile(r"_")
UNDERSCORES = _compile(r"(_)\1+")


@_spliter
def alias2keys(alias):
    return [
        _parse(key, errors=False)
        for key in filter(None, ALIAS_SPLIT.split(alias))
    ]


def _base_case(string):
    string = REPLACEABLE_WITH_UNDERSCORE.sub(r"_", string)
    string = INVALID_CHARS.sub(r"", string)
    string = CAMEL_CASE_SPACE1.sub(r"\1_\2", string)
    return CAMEL_CASE_SPACE2.sub(r"\1_\2", string)
Exemple #45
0
    def _add_or_update_endpoint(self, action, name, version, request_data):
        '''
        Add or update an endpoint
        '''
        _name_checker = _compile('^[a-zA-Z0-9-_\ ]+$')
        if not isinstance(name, basestring):
            raise TypeError("Endpoint name must be a string or unicode")

        if not _name_checker.match(name):
            raise gen.Return('endpoint name can only contain: a-z, A-Z, 0-9,'
                             ' underscore, hyphens and spaces.')

        if self.settings.get('add_or_updating_endpoint'):
            raise RuntimeError(
                "Another endpoint update is already in progress, "
                "please wait a while and try again")

        request_uuid = random_uuid()
        self.settings['add_or_updating_endpoint'] = request_uuid
        try:
            description = request_data[
                'description'] if 'description' in request_data else None
            docstring = request_data[
                'docstring'] if 'docstring' in request_data else None
            endpoint_type = request_data[
                'type'] if 'type' in request_data else None
            methods = request_data[
                'methods'] if 'methods' in request_data else []
            dependencies = request_data[
                'dependencies'] if 'dependencies' in request_data else None
            target = request_data[
                'target'] if 'target' in request_data else None
            schema = request_data[
                'schema'] if 'schema' in request_data else None

            src_path = request_data[
                'src_path'] if 'src_path' in request_data else None
            target_path = get_query_object_path(
                self.settings['state_file_path'], name, version)
            _path_checker = _compile('^[\\a-zA-Z0-9-_\ /]+$')
            # copy from staging
            if src_path:
                if not isinstance(request_data['src_path'], basestring):
                    raise gen.Return("src_path must be a string.")
                if not _path_checker.match(src_path):
                    raise gen.Return(
                        'Endpoint name can only contain: a-z, A-Z, 0-9,underscore, hyphens and spaces.'
                    )

                yield self._copy_po_future(src_path, target_path)
            elif endpoint_type != 'alias':
                raise gen.Return(
                    "src_path is required to add/update an endpoint.")

            # alias special logic:
            if endpoint_type == 'alias':
                if not target:
                    raise gen.Return('Target is required for alias endpoint.')
                dependencies = [target]

            # update local config
            try:
                if action == 'add':
                    self.tabpy.add_endpoint(name=name,
                                            description=description,
                                            docstring=docstring,
                                            endpoint_type=endpoint_type,
                                            methods=methods,
                                            dependencies=dependencies,
                                            target=target,
                                            schema=schema)
                else:
                    self.tabpy.update_endpoint(name=name,
                                               description=description,
                                               docstring=docstring,
                                               endpoint_type=endpoint_type,
                                               methods=methods,
                                               dependencies=dependencies,
                                               target=target,
                                               schema=schema,
                                               version=version)

            except Exception as e:
                raise gen.Return("Error when changing TabPy state: %s" % e)

            on_state_change(self.settings)

        finally:
            self.settings['add_or_updating_endpoint'] = None
Exemple #46
0
    RESTServiceClient as _RESTServiceClient,
    Endpoint as _Endpoint,
    AliasEndpoint as _AliasEndpoint,
)

from .custom_query_object import CustomQueryObject as \
    _CustomQueryObject

import os as _os


import logging as _logging
_logger = _logging.getLogger(__name__)


_name_checker = _compile('^[a-zA-Z0-9-_\ ]+$')

if sys.version_info.major == 3:
    unicode = str


def _check_endpoint_name(name):
    """Checks that the endpoint name is valid by comparing it with an RE and
    checking that it is not reserved."""
    if not isinstance(name, (str,unicode)):
        raise TypeError("Endpoint name must be a string or unicode")

    if name == '':
        raise ValueError("Endpoint name cannot be empty")

    if not _name_checker.match(name):
"""
This module provides functionality required for managing endpoint objects in
TabPy. It provides a way to download endpoint files from remote
and then properly cleanup local the endpoint files on update/remove of endpoint
objects.

The local temporary files for TabPy will by default located at
    /tmp/query_objects

"""
import logging
import os
import shutil
from re import compile as _compile

_name_checker = _compile(r"^[a-zA-Z0-9-_\s]+$")


def _check_endpoint_name(name, logger=logging.getLogger(__name__)):
    """Checks that the endpoint name is valid by comparing it with an RE and
    checking that it is not reserved."""
    if not isinstance(name, str):
        msg = "Endpoint name must be a string"
        logger.log(logging.CRITICAL, msg)
        raise TypeError(msg)

    if name == "":
        msg = "Endpoint name cannot be empty"
        logger.log(logging.CRITICAL, msg)
        raise ValueError(msg)
from resources import Annotation, Document, Sentence

### Constants
TXT_EXT = '.txt'
SS_EXT = '.ss'
A1_EXT = '.a1'
A2_EXT = '.a2'
RES_EXTS = set((TXT_EXT, SS_EXT, A1_EXT, A2_EXT))
# We use this mapping for sorting
PRIO_BY_RES_EXT = {
        TXT_EXT: 0,
        SS_EXT: 10,
        A1_EXT: 20,
        A2_EXT: 30,
        }
TB_SO_REGEX = _compile((r'^T[0-9]+\t(?P<type>[^ ]+) (?P<start>\d+) '
    r'(?P<end>\d+)(?:\t(?P<text>.*?))?$'))
# TODO: data dir could be referred elsewhere
DATA_DIR = join_path(dirname(__file__), '../data/corpora')
BIONLP_2011_DIR = join_path(DATA_DIR, 'bionlp_2011_st')
BIONLP_2009_DIR = join_path(DATA_DIR, 'bionlp_2009_st')
GREC_DIR = join_path(DATA_DIR, 'grec')
CALBC_CII_DIR = join_path(DATA_DIR,
        'calbc_ii_st_format_500_sample')
NLPBA_DIR = join_path(DATA_DIR,
        'nlpba_slightly_wrong')
NLPBA_DOWN_DIR = join_path(DATA_DIR,
        'nlpba_slightly_wrong_downsized')
#        'CALBC.CII.75k.cos98.3.EBI.CL.20101008_st_format')
# Epi and PTM data
BIONLP_2011_EPI_TRAIN_DIR = join_path(BIONLP_2011_DIR,
        'BioNLP-ST_2011_Epi_and_PTM_training_data')
Exemple #49
0
from server.util import ParsedRequest as _Parsed
from server.util import get_bearer_token, json_response

from .common import (
    add_to_db,
    clean_secure,
    get_user_by_id,
    save_to_db,
    send_acount_creation_webhook,
    send_admin_action_webhook,
)
from .cred_manager import CredManager

# regex to find the offending column
# there must be a better way - RH
find_error = _compile(r"Key\s*\(\"?(?P<key>.*?)\"?\)=\((?P<val>.*?)\)",
                      IGNORECASE).search


def get_integrity_error_cause(error_message: str):
    try:
        match = find_error(error_message)
        print(error_message)
        if not match:
            return None
        k = match.group("key")
        v = match.group("val")
        return k, v
    except Exception as e:
        print(e)
        return None
Exemple #50
0
                   c_void_p as _pvoid_, \
                   c_uint as _uint_, \
                   c_uint32 as _uint32_, \
                   c_uint8 as _uint8_
                   

_pchar_ = _PTR_(_char_)
_ppchar_ = _PTR_(_pchar_)  
_cast_cstr = lambda x: _cast(x,_str_).value.decode()

DLL_BASE_NAME = "tos-databridge"
DLL_DEPENDS1_NAME = "_tos-databridge"
SYS_ARCH_TYPE = "x64" if (_log(_maxsize * 2, 2) > 33) else "x86"
MIN_MARGIN_OF_SAFETY = 10

_REGEX_NON_ALNUM = _compile("[\W+]")
_REGEX_LETTER = _compile("[a-zA-Z]")
_VER_SFFX = '[\d]{1,2}.[\d]{1,2}'
_REGEX_VER_SFFX = _compile('-' + _VER_SFFX + '-')

_REGEX_DLL_NAME = _compile('^(' + DLL_BASE_NAME + '-)' \
                                + _VER_SFFX + '-' \
                                + SYS_ARCH_TYPE +'(.dll)$')
           
_dll = None
_dll_depend1 = None

      
def init(dllpath=None, root="C:\\", bypass_check=False):
    """ Initialize the underlying tos-databridge DLL
Exemple #51
0
def compile(rule):
	try:
		return _compile(rule)
	except Exception, e:
		raise Exception(str(e) + ': ' + rule)
        return 'WINDOW-FORMATTING-STRING'
    
    def featurise(self, document, sentence, annotation):
        before_ann = sentence.text[:annotation.start].split()
        before_ann.reverse()
        after_ann = sentence.text[annotation.end:].split()

        for i, tok in izip(xrange(1, 4), before_ann):
            for f_tup in self.feature._featurise(tok):
                yield ('-BEFORE-{}-{}'.format(i, f_tup[0]), f_tup[1])
        for i, tok in izip(xrange(1, 4), after_ann):
            for f_tup in self.feature._featurise(tok):
                yield ('-AFTER-{}-{}'.format(i, f_tup[0]), f_tup[1])

from re import compile as _compile
DATE_REGEX = _compile(r'^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$')

#TODO: Window BoW!

### Features to capture NP internal performance

class SpanBoWFeature(object):
    def get_id(self):
        return 'SPAN-BOW'

    def featurise(self, document, sentence, annotation):
        span_text = sentence.annotation_text(annotation)

        for tok in span_text.split():
            yield (tok, 1)
Exemple #53
0
class Simplifier(object):

    pre_operations = [
        lambda x: x.replace("parent's partner", "parent"),
        lambda x: x.replace("partner's child", "child"),
        # lambda x: x.replace("parent's child", ""),
        lambda x: x.replace("child's parent", ""),
        lambda x: x.replace(" 's", ""),
        lambda x: x.replace("  ", " "),
        lambda x: x if not x.startswith("'s") else x[2:],
        lambda x: x.strip(),
    ]
    operations = [
        lambda x: x.replace("parent's sibling", "aunt/uncle"),
        lambda x: x.replace("aunt/uncle's child", "cousin"),
        lambda x: x.replace("parent's child", "sibling"),
        lambda x: x.replace("sibling's child", "niece/nephew"),
        lambda x: x.replace("sibling's partner's child", "niece/nephew"),
        lambda x: x.replace("parent's niece/nephew", "cousin"),
        lambda x: x.replace("aunt/uncle's child", "cousin"),
        lambda x: x.replace("niece/nephew's sibling", "niece/nephew"),
        lambda x: x.replace("niece/nephew's child", "grandniece/nephew").
        replace("grandgrandniece/nephew", "great grandniece/nephew"),
    ]
    short_operations = [
        lambda x: Simplifier.relation_simplify_simple(x, "child"),
        lambda x: Simplifier.relation_simplify_simple(x, "parent"),
        # lambda x: Simplifier.relation_simplify_simple(x, "niece/nephew"),
        # lambda x: Simplifier.get_cousin_string(x),
        lambda x: x.replace("grandsibling", "great aunt/uncle"),
        lambda x: Simplifier.sibling_cousin_remover(x),
    ]
    post_operations = [
        lambda x: x.replace(" 's", ""),
        lambda x: x.replace("  ", " "),
        lambda x: x if not x.startswith("'s") else x[2:],
        lambda x: x.strip(),
    ]
    # cousin_matcher = _compile(r"(((great )*?)(grand)?(parent)'s )?(cousin)('s)? ?((((great )*? ?(grand)?)child('s)?)*)")  # magic
    cousin_matcher = _compile(
        r"(parent('s)?) (((parent('s)?)|(child('s)?)) ?)+($|(partner))")
    sibling_cousin_matcher = _compile(
        r"sibling's \d+((st)|(nd)|(rd)|(th)) cousin")
    nephew_child_matcher = _compile(r"(niece\/nephew's )((child('s )?)+)")

    @staticmethod
    def relation_simplify_simple(string: str, search_string: str) -> str:
        '''
        Simplifies down a range of "child's child's child's..." to one set of "[great...] grandchild

        Params:
            string: str
                The string to be searched and modified
            search_string: str
                The name to be searched for and expanded upon
        '''

        # Split it to be able to iterate through
        split = string.strip().split(' ')
        new_string = ''
        counter = 0
        for i in split:
            if i in [f"{search_string}'s", search_string]:
                counter += 1
            elif counter == 1:
                new_string += f"{search_string}'s {i} "
                counter = 0
            elif counter == 2:
                new_string += f"grand{search_string}'s {i} "
                counter = 0
            elif counter > 2:
                new_string += f"{'great ' * (counter - 2)}grand{search_string}'s {i} "
                counter = 0
            else:
                new_string += i + ' '

        # And repeat again for outside of the loop
        if counter == 1:
            new_string += f"{search_string}'s "
            counter = 0
        elif counter == 2:
            new_string += f"grand{search_string}'s "
            counter = 0
        elif counter > 2:
            new_string += f"{'great ' * (counter - 2)}grand{search_string}'s"
            counter = 0

        # Return new string
        new_string = new_string.strip()
        if new_string.endswith("'s"):
            return new_string[:-2]
        return new_string

    @classmethod
    def relation_simplify_nephew(cls, string: str) -> str:
        '''
        Simplifies down a range of "niece/nephew's child's childs..." to one set of "[great...] grandniece/nephew

        Params:
            string: str
                The string to be searched and modified
        '''

        # k = cls.nephew_child_matcher.search(string)
        # great_count = k.group(2).count(' ')
        # if 'child' in k.group(2):
        #     span = k.span()
        #     return string[:span[0]] + f"{'great ' * great_count}grandniece/nephew" + string[:span[1]]
        return string

    @staticmethod
    def get_cousin_parent_count(k):
        '''Gets the amount of generations UP the cousin count goes'''

        p = 0
        if k.group(3):
            # greats
            p += k.group(3).strip().count(' ')
        if k.group(4):
            # grand
            p += 1
        if k.group(5):
            # parent
            p += 1
        return p

    @staticmethod
    def get_cousin_child_count(k):
        '''Gets the amount of generations DOWN the cousin count goes'''

        # group 5 is cousin, so we get an extra space
        # group 7 is [child's child's...]
        # group 12 is GRAND-child
        return (k.group(6) + k.group(8)).strip().count(' ') + {
            True: 1,
            False: 0
        }[bool(k.group(12))]

    @classmethod
    def get_cousin_string(cls, string: str):
        '''Gets the full cousin string'''

        k = cls.cousin_matcher.search(string)
        if not k:
            return string
        # if k.group(0).startswith("parent's child"):
        #     span = k.span()
        #     return string[:span[0]] + " sibling's " + string[span[1]:]

        p = k.group(0).count(
            'parent')  # p = cls.get_cousin_parent_count(k)  # parent
        c = k.group(0).count(
            'child')  # c = cls.get_cousin_child_count(k)  # child

        if p < 2:
            # Make sure we're not just working on nieces/children/siblings
            return string
        if c == 1:
            # This is a variation on aunt/uncle
            if p <= 2:
                return string[:k.span()[0]] + "aunt/uncle" + string[k.span(
                )[1]:]
            return string[:k.span(
            )[0]] + f"{'great ' * (p - 3)} grand aunt/uncle" + string[k.span(
            )[1]:]

        p -= 2
        c -= 2
        x = c + 1 if (c + 1) < p + 1 else p + 1  # nth cousin
        y = abs(p - c)  # y times removed

        if x < 1:
            return string
        if x == 1 and y == 0:
            return string[:k.span()[0]] + "cousin" + string[k.span()[1]:]
        cousin_string = ""
        if str(x).endswith('1') and x != 11:
            cousin_string += f"{x}st cousin "
        elif str(x).endswith('2') and x != 12:
            cousin_string += f"{x}nd cousin "
        elif str(x).endswith('3') and x != 13:
            cousin_string += f"{x}rd cousin "
        else:
            cousin_string += f"{x}th cousin "
        if y == 0:
            return string[:k.span()[0]] + cousin_string.strip(
            ) + string[k.span()[1]:]
        return string[:k.span()[0]] + (cousin_string + {
            True: "1 time removed",
            False: f"{y} times removed"
        }[y == 1]).strip() + string[k.span()[1]:]

    @classmethod
    def sibling_cousin_remover(cls, string: str):
        '''Removes "sibling's nth cousin" to "nth cousin"'''

        k = cls.sibling_cousin_matcher.search(string)
        if not k:
            return string
        span = k.span()
        return string[:span[0]] + k.group(0).replace("sibling's ",
                                                     "") + string[span[1]:]

    @classmethod
    def simplify(cls, string: str):
        '''Runs the given input through the shortening operations
        a number of times so as to shorten the input to a nice
        family relationship string'''

        before = string
        for i in range(10):
            for o in cls.pre_operations:
                string = o(string)
                if string == before:
                    continue
                else:
                    before = string
        for i in range(5):
            string = cls.get_cousin_string(string)
            if string == before:
                continue
            else:
                before = string
        for i in range(10):
            for o in cls.operations:
                string = o(string)
                if string == before:
                    continue
                else:
                    before = string
        for i in range(10):
            for o in cls.short_operations:
                string = o(string)
                if string == before:
                    continue
                else:
                    before = string
        for i in range(10):
            for o in cls.post_operations:
                string = o(string)
                if string == before:
                    continue
                else:
                    before = string
        for i in range(10):
            for o in cls.short_operations:
                string = o(string)
                if string == before:
                    continue
                else:
                    before = string
        return string
Exemple #54
0
from pathlib import Path
from textwrap import dedent
from re import compile as _compile
import json

import requests
from jinja2 import Template

try:
    from packaging.version import parse
except ImportError:
    from pip._vendor.packaging.version import parse

# -- Code ----------------------------------------------------------------------

DUNDER_REGEXP = _compile(r'(__(.*?)__ = "(.*?)")\n')


def read_data() -> Dict[str, str]:
    """ Read data from __versions__ py """

    init = Path(".").parent / "deadlinks" / "__version__.py"

    if not Path(init).is_file():
        raise RuntimeError("Can not find source for deadlinks/__version__.py")

    values = dict() # type: Dict[str, str]
    with open(str(init)) as fh:
        content = "".join(fh.readlines())
        for match in DUNDER_REGEXP.findall(content):
            values[match[1]] = match[2]
Exemple #55
0
#TODO: Lehvenstein tolerance, warn if within
#TODO: No-warn
#TODO: Print filename if mismatch flag

### Constants
ARGPARSER = ArgumentParser(description=('reads paths to PTB files on stdin '
        'adds a suffix to the path and compares the resulting files '
        'for consistency'))
ARGPARSER.add_argument('dep_format', choices=['conll', 'sd'],
        help='format of the output to sanity check')
# It is a bit dirty to assume suffixing, but it makes thing a little easier
ARGPARSER.add_argument('--dep_suffix', default=None, help=('suffix to add to '
    'the input file path to get the path to the resulting dependency file '
    '(default: dep_format value)'))

PTB_TOK_REGEX = _compile(r'\([^(]+?\ ([^(]+?)\)')
PTB_ESCAPE_MAP = { 
        '-LRB-': '(',
        '-RRB-': ')',
        '-LSB-': '[',
        '-RSB-': ']',
        '-LCB-': '{',
        '-RCB-': '}',
        #Apparently at least the CoNLL converter don't want this done
        #'``': '"',
        }
###

#XXX: We preserve espaces inside tokens, for, well, others do it
def _ptb_unescape(tok):
    if tok in PTB_ESCAPE_MAP:

from graphlab.connect.aws._ec2 import get_credentials as _get_credentials
import graphlab as _gl
import graphlab.connect as _mt

# since _predictive_service_environment imports these, need to have them defined first
_MAX_CREATE_TIMEOUT_SECS = 600 # 10m

from _predictive_service._predictive_service_environment import Ec2PredictiveServiceEnvironment as _Ec2PredictiveServiceEnvironment
from _predictive_service._predictive_service_environment import LocalPredictiveServiceEnvironment as _LocalPredictiveServiceEnvironment
from _predictive_service._file_util import parse_s3_path as _parse_s3_path, s3_recursive_delete as _s3_recursive_delete, s3_delete_key as _s3_delete_key
from _predictive_service._predictive_service import PredictiveService as _PredictiveService

_logger = _getLogger(__name__)
_name_checker = _compile('^[a-zA-Z-]+$')

def create(name, environment, state_path, description = None, api_key = None, admin_key = None,
           ssl_credentials = None):
    '''
    Launch a Predictive Services cluster. This cluster can currently be launched
    on EC2 by specifying an EC2 environment.

    Parameters
    ----------
    name : str
        The name of the Predictive Service that will be launched.

        This string can only contain: a-z, A-Z and hyphens.

    environment : :class:`~graphlab.deploy.environment.EC2` or str
Exemple #57
0
def compile(rule):
    try:
        return _compile(rule)
    except Exception as e:
        raise Exception(str(e) + ': ' + rule)