Ejemplo n.º 1
0
 def ls(self, path: str, patt: str = "*", **kw) -> 'L':
     _out = []
     v = self._value.get(path, None)
     if v is None or v is NONE:
         return NONE
     if isinstance(v, Monad):
         v = v.value
     if isinstance(v, dict) is True and v.get('__dir__', False) is True:
         for k in v:
             if re.match(r"\_\_(.*)", k) is not None:
                 continue
             if fnmatch(k, patt) is not True:
                 continue
             _v = v[k]
             if isinstance(_v, dict) is True and _v.get(
                     '__dir__', False) is True and kw.get('dir',
                                                          True) is True:
                 _nns = Namespace(_v)
                 self._clone(self, _nns, "stor")
                 _out.append(_nns)
             else:
                 if kw.get("type", None) is not None and isinstance(
                         kw.get("type"), tuple) is True:
                     if isinstance(_v, kw.get("type")) is True:
                         _out.append(Just(_v))
                 else:
                     _out.append(Just(_v))
     else:
         return NONE
     res = L(_out)
     res.name = path
     return L(_out)
Ejemplo n.º 2
0
    def run(self, connections):
        '''
        Reduces jobs calling them sequentially. Optionally perform operations before and afterwards (setup/teardown)

        :param connections: Two-connection tuple to know where to pull the data from and here to push it back to
        :return:  Just(connections) if everything went well else Nothing()
        '''
        from_cnx, to_cnx = connections

        from_cursor = from_cnx.cursor()
        to_cursor = to_cnx.cursor()

        init = query(to_cursor, self.setup).map(lambda _: (from_cursor, _)) if self.setup else \
            Just((from_cursor, to_cursor))

        def reducer(acc, m):
            '''
            Commit each job

            :param acc: Accumulated result
            :param m: New job
            :return: Just(cursors) or Nothing() if job failed
            '''
            to_cnx.commit()
            return acc | m

        return reduce(reducer, self.jobs, init) \
               | (lambda _: query(to_cursor, self.teardown) if self.teardown else Just(_)) \
               | (lambda _: Just(connections))
Ejemplo n.º 3
0
    def V(self, path: str, value: Monad = NONE) -> Any:
        import os.path

        if path[0] != "/":
            path = f"/home/{path}"

        if value is NONE or value is None or value is Nothing():
            if self.stor.here(path) is True:
                res = self.stor.get(path)
                if res is None:
                    return NONE
                else:
                    return Just(res)
            return self._value.get(path)
        else:
            if isinstance(value, Monad) is True:
                data = value.value
            else:
                data = value
            if self.stor.here(path) is True:
                self.stor.set(path, data)
            else:
                self._value.set(path, value)
            return value
        return NONE
Ejemplo n.º 4
0
    def run(self, from_, to):
        '''
        Reduces pipelines calling running them sequentially.
        Commit if success, log and rollback if not.

        Finally close everything.

        :param from_:
        :param to:
        :return:
        '''
        from_cnx = connection(from_)
        to_cnx = connection(to)

        try:
            migration = reduce(lambda acc, pipeline: acc | pipeline,
                               map(verbose, self.pipelines),
                               Just((from_cnx, to_cnx)))

            if migration.is_just:
                to_cnx.commit()
                print("Migration complete.")
            else:
                print("Migration failed.")
        except Exception as e:
            print(e)
        finally:
            print('Closing all')
            to_cnx.rollback()
            to_cnx.close()
            from_cnx.close()
Ejemplo n.º 5
0
def query(cursor, statement, params=None):
    '''
    Execute a SQL query using the given cursor.

    If params is a collection, will execute queries batching by 10K to avoid transaction rejection.

    :param cursor: SQL cursor
    :param statement: Statement to execute
    :param params: SQL query parameters
    :return: Just(cursor) or Nothing() if query failed
    '''
    try:
        if isinstance(params, (dict, list, tuple)):
            step = 10000
            for i in range(0, len(params), step):
                cursor.executemany(statement, params[i:i + step])
        elif params:
            cursor.execute(statement, params)
        else:
            cursor.execute(statement)
        return Just(cursor)
    except mysql.connector.Error as e:
        print(e)
        cursor.close()
        return Nothing()
Ejemplo n.º 6
0
    def transform(_, telecoms):
        def scrub_number(telecom):
            telecom_id, system, value, *rest = telecom
            return (telecom_id, system, synthetic_number(value), *rest)

        res = Just(list(map(scrub_number, telecoms)))

        return res
Ejemplo n.º 7
0
    def transform(_, networks):
        name_synthesizer = synthetic_network_name_generator

        by_state = (_[1] + (_[0],) for group in groupby(networks, lambda n: n[-1]) for _ in enumerate(group[1]))
        with_name = (_[:-4] + (name_synthesizer(_[-3], _[-2], _[-1]), name_synthesizer(_[-4], _[-2], _[-1])) for _ in
                     by_state)

        return Just(with_name)
Ejemplo n.º 8
0
    def transform(_, plans):
        name_synthesizer = synthetic_insurance_plan_name_generator()

        def scrub_plan(idx_and_record):
            idx, record = idx_and_record
            ip_id, md_id, status, name, *rest = record
            return (ip_id, md_id, status, name_synthesizer(idx, name), *rest)

        return Just(map(scrub_plan, enumerate(plans)))
Ejemplo n.º 9
0
    def transform(_, addresses):
        address_line_generator = synthetic_address_line_generator()

        def scrub_line1(record):
            address_id, use, type, text, line1, *rest = record
            return (address_id, use, type, text, address_line_generator(line1),
                    *rest)

        return Just(map(scrub_line1, addresses))
Ejemplo n.º 10
0
    def run(connections):
        from_, to = connections

        return SQLPipeline(SQLJob(
            '', lambda cursor, params: Just(None),
            'INSERT INTO {2}.{0} SELECT * FROM {1}.{0};'.format(
                table, from_.database, to.database)),
                           setup=setup,
                           teardown=teardown).run(connections)
Ejemplo n.º 11
0
    def transform(_, names):
        first_name_generator = synthetic_first_name_generator()
        last_name_generator = synthetic_last_name_generator()

        def scrub_name(record):
            name_id, use, text, family, given, *rest = record
            return (name_id, use, text, last_name_generator(family),
                    first_name_generator(given), *rest)

        return Just(map(scrub_name, names))
Ejemplo n.º 12
0
    def transform(_, aliases):
        name_synthesizer = synthetic_org_name_generator()

        def scrub(record):
            taxonomies, organization_alias_id, period_start, period_end, value, *rest = record
            return (organization_alias_id, period_start, period_end,
                    name_synthesizer(
                        value,
                        *taxonomies.split(',') if taxonomies else []), *rest)

        return Just(map(scrub, aliases))
Ejemplo n.º 13
0
def identity_transformer(entity_id, to_cursor, records):
    '''
    Use by :func:migrate_identifier

    Return the identifiers unchanged.

    :param entity_id: Ignored
    :param to_cursor: Ignored
    :param records: Records for which to synthesize identifier values
    :return: Just(records)
    '''
    return Just(records)
Ejemplo n.º 14
0
    def __call__(self, cursors):
        '''
        1. Extract executing extract_stmt using the first cursor
        2. Load results into memory. We might not do it here if we're able to highly rely on the network connection.
           i.e. for very big job the transaction would need to stay open all the way long if we don't buffer the result.
        3. Transform using the given transformer
        4. Load executing load_stmt using the second cursor and the transformed records

        :param cursors: Two-tuple of SQL cursors to execute the given statements
        :return: Just(cursors) or Nothing() in case of failure
        '''
        from_cursor, to_cursor = cursors

        return query(from_cursor, self.extract_stmt) \
               | (lambda fc: self.transform(to_cursor, tuple(fc))) \
               | (lambda ts: query(to_cursor, self.load_stmt, tuple(ts) if ts else ts)) \
               | (lambda tc: Just((from_cursor, tc)))
Ejemplo n.º 15
0
def tokenize(acronyms, name):
    """
    Steps are:
    1. Remove unwanted punctuation (for instance we don't mind parenthesize or brackets or some other punctuation)
    2. Break the name down into a list of tokens
    3. Check if tokens that contain a dot are actually some known acronym or if we should simply split it in two tokens

    :param acronyms: A collection of known acronyms. Used to identify acronyms ignoring the dots
    (i.e. M.D = M.D. = MD. = MD)
    :param name: The organization name
    :return: A collection of tokens as a tuple. Must be a tuple as its used as a dict key for caching purpose.
    https://wiki.python.org/moin/DictionaryKeys
    """
    tokens = Just(name)\
        .map(remove_unwanted)\
        .map(breakdown)\
        .map(List.from_iterable)\
        .from_maybe(List.empty())\
        .bind(find_acronym(acronyms))
    return tuple(tokens)
Ejemplo n.º 16
0
    def transform(_, orgs):
        name_synthesizer = synthetic_org_name_generator()

        def scrub_org(org):
            organization_id, name, taxonomies, active, partOf_organization_id = org
            if organization_id % 10000 == 0:
                print(organization_id)
            return organization_id, name_synthesizer(name, *taxonomies.split(',') if taxonomies else []), active, \
                   partOf_organization_id

        @toolz.curry
        def attach_partOf_name(d, org):
            partOf_name = (d.get(org[-1])[1], ) if d.get(org[-1]) else (None, )
            return org + partOf_name

        # Keep dict of generated names to attach parent org. names afterward
        without_partOf_name = {_[0]: _ for _ in map(scrub_org, orgs)}
        with_partOf_name = map(attach_partOf_name(without_partOf_name),
                               without_partOf_name.values())
        return Just(with_partOf_name)
Ejemplo n.º 17
0
def scrub_tokenized_name(non_identifiables, med_vocab, names, taxonomies,
                         tokens):
    """
    Caches the result of scrubbing the given tokens to keep consistency i.e. some orgs may have the same name and be
    differentiated by other properties (NPI), we want them to keep the same synthetic name as well.

    We prefer to cache the token list instead of the name itself as punctuation might interfere i.e.
    'MY ORGANIZATION, INC.' = 'MY ORGANIZATION INC.' = 'MY ORGANIZATION INC'

    The scrubbing workflow is s such:
    1. Scrub the given tokens
    2. If the result isn't persuasive and there is taxonomies available, append them
    3. If the result is still not persuasive or to complete it, prepend some names

    :param tokens: The list of words found in the organization name (see :tokenize)
    :return: A new list of synthetic tokens
    """
    return Just(tokens)\
        .map(scrub_list(non_identifiables, med_vocab, names)) \
        .map(append_taxonomies(taxonomies, names)) \
        .map(prepend_names(names)) \
        .from_just()
Ejemplo n.º 18
0
def scrub_name(non_identifiables, med_vocab, names, acronyms, org_numbers,
               name, *taxonomies):
    """
    :param id: The organization id
    :param name: The organization name
    :param taxonomies: A list of taxonomies attached to this organization
    :return: A tuple (id, str) i.e. attach the synthetic name to the same id
    """
    @toolz.curry
    def token_scrubber(tokens):
        """
        Couldn't figure how to combine memoization and currying in :scrub_tokenized_name so I decomposed it.
        We could also simply use a lambda.
        """
        return scrub_tokenized_name(non_identifiables, med_vocab, names,
                                    taxonomies, tokens)

    return Just(name)\
        .map(strip_non_characters)\
        .map(tokenize(acronyms))\
        .map(token_scrubber)\
        .map(' '.join)\
        .map(lambda _: _ if len(_.strip()) else 'Organization #{}'.format(next(org_numbers)))\
        .from_just()
Ejemplo n.º 19
0
 def __init__(self, value: Any) -> None:
     Just.__init__(self, value)
Ejemplo n.º 20
0
 def rmdir(self, path: str):
     return Just(self.raw().rm(path))
Ejemplo n.º 21
0
 def Name(self):
     return Just(self._value.raw()['__name__'])
Ejemplo n.º 22
0
import time

from oslash import Left, Right, Just, Nothing, IO, Get, ReadFile, Put
from datetime import datetime
'''
One great library to play with Monads is [oslash] to install it just use [pip] command [pip install oslash]
'''
###################
#      MAYBE      #
###################
'''Maybe Monad is based in Haskell Monad, it contains the monad type Just which contains the value, and the Monad
   Nothing which it does not contain any value'''
just = Just("I think therefor I am")
print(just)
nothing = Nothing()
print(nothing)
'''Transformation with Maybe as Functor it's pretty much the same as in Scala, you just use [map] operator.'''
justResult = Just("Hello") \
    .map(lambda word: word + " maybe") \
    .map(lambda word: word + " python") \
    .map(lambda word: word + " world") \
    .map(lambda word: word.upper())

print(justResult)
'''In case of type Nothing none function will be apply in the pipeline.'''
nothingResult = Nothing() \
    .map(lambda word: word + " maybe") \
    .map(lambda word: word + " python") \
    .map(lambda word: word + " world") \
    .map(lambda word: word.upper())
Ejemplo n.º 23
0
 def isDir(self):
     return Just(self._value.raw()['__dir__'])