Ejemplo n.º 1
def get_doc_attachments(conn: ConnType,
                        attachment: int,
                        prefix: _PathLike = "",
                        itemIDs: _Optional[_Sequence[str]] = None,
                        **kwargs: int):
    """Returns the paths to the attachments to all documents.

    conn : sqlalchemy.engine.Connection
        The connection object to the database.
    attachment : int
        The ID of the item type *attachment*.
    prefix : str, pathlib.Path, or path-like
        The path prefix to prepend.
    itemIDs : list-like of str or None
        The itemID of interest. If None, consider all items.
    **kwargs : int
        Not used. Just to conform the signature with other similar functions.

        The values in the only one column are the relative paths to attachments. The indices of are
        "itemID"s. The relative paths are relative to the Zotero storage path.
    # pylint: disable=unused-argument

    query = """
        SELECT itemAttachments.parentItemID, items.key, itemAttachments.path
        FROM items, itemAttachments
            items.itemTypeID = {attachment} AND
            itemAttachments.itemID = items.itemID

    if itemIDs is not None:
        query = _sub(
            "WHERE", "WHERE itemAttachments.parentitemID in ({}) AND".format(
                ", ".join(itemIDs)), query)

    results: pandas.DataFrame = pandas.read_sql_query(query, conn)
    results: pandas.DataFrame = results.rename(
        columns={"parentItemID": "itemID"})
    results: pandas.DataFrame = results.set_index("itemID").dropna(
        0, subset=["path"])

    prefix = _Path(prefix)
    results["key"] = results["key"].map(prefix.joinpath)
    results["path"] = results["path"].str.replace("storage:", "")
    results: pandas.Series = results.apply(
        lambda x: x["key"].joinpath(x["path"]), 1)
    results: pandas.core.groupby.SeriesGroupBy = results.groupby(level=0)
    results: pandas.Series = results.aggregate(lambda x: x.values.tolist())
    results: pandas.Series = results.apply(
        lambda x: x[0] if len(x) == 1 else x, 1)
    results: pandas.DataFrame = results.to_frame("attachment path")

    return results
Ejemplo n.º 2
def _create_map_job(function, parameter_set, name=None, environment=None, combiner_function=None, _job_type="PIPELINE"):


    # Name the job
    now = _datetime.now().strftime("%b-%d-%Y-%H-%M-%S")
    function_name = _sub("[<>]", "", function.__name__)

    name = "%s-%s" % (function_name, now) if not name else name

    # Validate args
    function, name, environment = _job._validate_job_create_args(function, name, environment)
    _session = _gl.deploy._default_session
    while _session.exists(name, __job.Job._typename):
        rand = str(_uuid.uuid4())[:5]
        old_name = name
        name = "%s-%s" % (name, rand)
        __LOGGER__.info("A job with name '%s' already exists. " "Renaming the job to '%s'." % (old_name, name))

    # Convert SFrame to a dict
    if not parameter_set:
        raise RuntimeError("An empty parameter_set was given. Nothing to do.")

    # If parameter set is a generator/SFrame, make sure it gets expanded out.
    parameter_set_copy = []
    for i in parameter_set:
        if not isinstance(i, dict):
            raise TypeError(
                "'parameter_set' has to be an iterable of dictionary."
                " For void functions, use an empty dictionary as inputs."

    # Create the task.
    task_prototype = _task.Task(function, function_name)
    for_each_iterations = _generate_mapjob_tasks(task_prototype, parameter_set_copy)

    # List of outputs for the final step.
    if not combiner_function:
        list_of_tasks = for_each_iterations[0]
        combiner = _task.Task(combiner_function)

        # The input to this task is all other tasks
        task_name_to_task = {}
        for stage in for_each_iterations:
            for t in stage:
                task_name_to_task[t.name] = t

        list_of_tasks = combiner

    # Create the job
    job = __job.Job(
        name, stages=for_each_iterations, environment=environment, final_stage=list_of_tasks, _job_type=_job_type
    return job
Ejemplo n.º 3
def _extract(token, sentence):
    tokens = ('take', 'pick', 'apply', 'name_it', '$')
    token_idx = {'take': 0, 'pick': 1, 'apply': 2, 'name_it': 3}
    left_token = tokens[token_idx[token]]
    right_token = tokens[token_idx[token] + 1]
    result = _search(left_token + '(.+?)' + right_token, sentence)
    result = _sub('take|pick|apply|name_it', '', result.group())
    result = result.strip()
    return result
Ejemplo n.º 4
def get_doc_authors(conn: ConnType,
                    attachment: int,
                    note: int,
                    author: int,
                    itemIDs: _Optional[_Sequence[str]] = None,
                    **kwargs: int):
    """Returns the last names of the authors of all documents.

    conn : sqlalchemy.engine.Connection
        The connection object to the database.
    attachment : int
        The ID of the item type *attachment*.
    note : int
        The ID of the item type *note*.
    author : int
        The ID of the creator type *author*.
    itemIDs : list-like of str or None
        The itemID of interest. If None, consider all items.
    **kwargs : int
        Not used. Just to conform the signature with other similar functions.

        The values in the only one column are lists of strings of last names. The indices of are
    # pylint: disable=unused-argument

    query = """
        SELECT items.itemID, itemCreators.orderIndex, creators.lastName
        FROM items, itemCreators, creators
            items.itemTypeID <> {attachment} AND
            items.itemTypeID <> {note} AND
            itemCreators.itemID = items.itemID AND
            itemCreators.creatorTypeID = {author} AND
            creators.creatorID = itemCreators.creatorID
    """.format(attachment=attachment, note=note, author=author)

    if itemIDs is not None:
        query = _sub(
            "WHERE items.itemID in ({}) AND".format(", ".join(itemIDs)), query)

    results: pandas.DataFrame = pandas.read_sql_query(query, conn)
    results: pandas.DataFrame = results.sort_values(["itemID", "orderIndex"])
    results: pandas.DataFrame = results.set_index("itemID").drop(
    results: pandas.core.groupby.DataFrameGroupBy = results.groupby(level=0)
    results: pandas.DataFrame = results.aggregate(lambda x: x.values.tolist())
    results: pandas.DataFrame = results.rename(columns={"lastName": "author"})

    return results
def _cast_dumbstring(string: str, data_type):
    """Casts, if possible, a raw string returned by the OpenDSS text interface to the type specified in data_type."""

    if data_type == str:
        return string
    if data_type in (int, float):
        if string == '----':  # this happens, f.e., if you define a line by rmatrix, xmatrix and then ask for r0, x0...
            return _np.NaN
            return data_type(string)
    elif data_type == _np.ndarray:
            return _np.asarray(eval(string))
            return matricize_str(
                string.replace('   ', ' ').replace('  ', ' ').replace(
                    ' | ', ';').replace('| ', ';').replace(' |', ';').replace(
                        '|', ';').replace('[  ', '').replace('[ ', '').replace(
                            '').replace('  ]', '').replace(' ]', '').replace(
                                ']', '').replace(', ', ',').replace(' ', ','))
    elif data_type == list:
        dp_str = _sub('[\,|\ ]*(\]|"|\))', '', string)
        dp_str = _sub('(\[|"|\()\ *', '', dp_str)
        items = dp_str.split(',')
            return [int(x) for x in items]
        except ValueError:
                return [float(x) for x in items]
            except ValueError:
                return items
        raise TypeError(
            'Could not cast the DSS property string "{1}": type {0} unknown'.
            format(str(data_type), string))
Ejemplo n.º 6
    def convert_length(self, string):
        """ Does a math like `10 meters to kilometers` """
            string = string.lower().replace(" ", "")
            formats = _sub(r"\d+", "", string)
            number = int(string.replace(formats, "").strip(",."))
            first_format, second_format = formats.split("to")
            assert first_format != second_format
            assert (-999999 < number < 999999999)

            for format in LengthFormats._ALL.value:
                if first_format in getattr(LengthFormats, format).value[0]:
                    first_format = getattr(LengthFormats, format).value[1]
                elif second_format in getattr(LengthFormats, format).value[0]:
                    second_format = format

            return f'{eval(f"{number}{first_format[LengthFormats._ALL.value.index(second_format)]}")} {second_format.lower()}'
            raise error_message("Unsupported or invalid calculation.")
Ejemplo n.º 7
    def func(conn: ConnType,
             itemIDs: _Optional[_Sequence[str]] = None,
             **mapping) -> pandas.Series:
        """Returns a list of all items' {0}s.

        1. Items with "attachment" type are ignored.
        2. Only return items with non-NaN values.

        conn : sqlalchemy.engine.Connection
            The connection object to the database.
        itemIDs : list-like of str or None
            The itemID of interest. If None, consider all items.
        **mapping : keyword-values
            The mapping from required keys to values used in query strings.

            All items' {0}s, where the indices are the `itemID`s, and it only has one column.

        if itemIDs is not None:
            Q = _sub(
                "WHERE items.itemID in ({}) AND".format(", ".join(itemIDs)),
            Q = query

        results: pandas.DataFrame = pandas.read_sql_query(
            Q.format(**mapping), conn)
        results: pandas.DataFrame = results.set_index("itemID").rename(
            {org_tag: new_tag}, axis=1)
        results: pandas.DataFrame = after(results)
        return results
Ejemplo n.º 8
def create(function, name=None, environment=None, **kwargs):
    Execute arbitrary functions in a remote environment.

    The job is specified as a function. All functions that are called from
    within the function are automatically captured. By default, this method will
    kick off asynchronous work, and return a Job object to monitor/manage that

    function : function
        Function to be executed in this Job, with arguments to pass to this
        function specified by `kwargs`.

    name : str, optional
        Name for this execution (names the returned Job). If set to None, then
        the name of the job is set to the name of the function with a time-stamp.
        Valid characters in job name include: digits, characters, '-' and '_'.

    environment : :class:`~graphlab.deploy.hadoop_cluster.HadoopCluster` | :class:`~graphlab.deploy.ec2_cluster.Ec2Cluster` | :class:`~graphlab.deploy.LocalAsync`, optional
        Optional environment for execution. If set to None, then a `LocalAsync`
        by the name `async` is created and used. This will execute the code in
        the background on your local machine.

        Function kwargs that are passed to the function for execution.

    job : :py:class:`~graphlab.deploy.Job`
        Used for monitoring and managing the execution of the Job.

    See Also
    graphlab.deploy.map_job.create, graphlab.deploy.Job

    Let us start out with a simple example to execute a function that can
    add two numbers.

    .. sourcecode:: python

        # Define a function
        def add(x, y):
            return x + y

        # Create a job.
        job = graphlab.deploy.job.create(add, x=1, y=1)

        # Get results from the execution when ready. This call waits for the
        # job to complete before retrieving the results.
        >>> print job.get_results()

    Exceptions within the function calls can be captured as follows:

    .. sourcecode:: python

        def add(x, y):
            if x and y:
                return x + y
                raise ValueError('x or y cannot be None')

        # Job execution capture the exception raised by the function.
        job = graphlab.deploy.job.create(add, x=1, y=None)

        # Get results from the execution when ready. This call waits for the
        # job to complete before retrieving the results.
        >>> print job.get_results()

        # Get the exceptions raised from this execution by calling
        # job.get_metrics()
        >>> print job.get_metrics()
        | task_name | status | start_time | run_time |   exception_message   |
        |    add    | Failed | 1427928898 |   None   | x or y cannot be None |
        |      exception_traceback      |
        | Traceback (most recent cal... |
        [1 rows x 6 columns]

    If a function requires a package to be installed, the function can be
    annotated with a decorator.

    .. sourcecode:: python

        def my_function(number = 10):
            import names
            people = [names.get_full_name() for i in range(number)]
            sf = graphlab.SFrame({'names':people})
            return sf

        job = graphlab.deploy.job.create(my_function)

        >>> print job.get_results()

                names    str

        |       names       |
        |   Annette Logan   |
        |   Nancy Anthony   |
        |  Tiffany Zupancic |
        |    Andre Coppin   |
        |     Robert Coe    |
        |    Donald Dean    |
        |    Lynne Bunton   |
        |   John Sartwell   |
        |   Peter Nicholas  |
        | Chester Rodriguez |
        [10 rows x 1 columns]

    Complex functions that require SFrames, GraphLab models etc. can be deployed
    with ease. All additional state required by the function are automatically

    .. sourcecode:: python

        GLOBAL_CONSTANT = 10

        def foo(x):
            return x + 1

        def bar(x):
            return x + 2

        def my_function(x, y):
            foo_x = foo(x)
            bar_y = bar(y)
            return foo_x + bar_y + GLOBAL_CONSTANT

        # Automatically captures all state needed by the deployed function.
        job = graphlab.deploy.job.create(my_function, x = 1, y = 1)

        >>> print job.get_results()

    You can execute the same job remotely by passing a different environment.

    .. sourcecode:: python

        # Define a function
        def add(x, y):
            return x + y

        # Define an EC2 environment
        ec2 = graphlab.deploy.Ec2Config()

        # Create an EC2 cluster object
        c = graphlab.deploy.ec2_cluster.create('my_cluster', 's3://bucket/path', ec2)

        # Create a job.
        job = graphlab.deploy.job.create(add, environment=c, x=1, y=1)

        >>> print job.get_results()

    - When an exception is raised within the deployed function,
      :func:`~graphlab.deploy.Job.get_results` returns None.

    - For asynchronous jobs, :func:`~graphlab.deploy.Job.get_results` is a
      blocking call which will wait for the job execution to complete
      before returning the results.

    _session = _gl.deploy._default_session



    # Name the job
    now = _datetime.now().strftime('%b-%d-%Y-%H-%M-%S')
    function_name = _sub('[<>]', '', function.__name__)

    name = '%s-%s' % (function_name, now) if not name else name
    # Validate args
    function, name, environment = _validate_job_create_args(
        function, name, environment)
    while _session.exists(name, _job.Job._typename):
        rand = str(_uuid.uuid4())[:5]
        old_name = name
        name = "%s-%s" % (name, rand)
        __LOGGER__.info("A job with name '%s' already exists. "
                        "Renaming the job to '%s'." % (old_name, name))

    # Setup the task & job
    task = _task.Task(function, function_name)
    job = _job.Job(name,
    # Setup the env.
    __LOGGER__.info("Validation complete. Job: '%s' ready for execution." %
    exec_env = _env._get_execution_env(environment)
    job = exec_env.run_job(job)

    # Save the job and return to user
    if not isinstance(environment, _environment.Local):
        __LOGGER__.info("Job: '%s' scheduled." % name)
        __LOGGER__.info("Job: '%s' finished." % name)

    return job
Ejemplo n.º 9
 def _name_from_function(func):
     return _sub("_", " ", func.__name__)
Ejemplo n.º 10
    return idx

def remove_falsey_values(iterable) -> list:
    return list(filter(bool, iterable))

def image_url_parser(img: str) -> dict:
    # example of a clean URL -> https://m.media-amazon.com/images/M/MV5BMjIxMjgxNTk0MF5BMl5BanBnXkFtZTgwNjIyOTg2MDE@
    full_hd_url = img.split("._V1")[0]
    # full_hd_url = "".join(map(lambda x: x + "@", _full_hd_url))
    _height = full_hd_url + "._V1_SY{{height}}.jpg"
    return {"full": full_hd_url, "template_height": _height}

sanitize_str = lambda movie: _sub(r"([^\w]|_)", "", movie).strip().lower()

def get_page(url: str) -> Soup:
    print("[debug] Requesting:", url)
    page = requests.get(url, headers=BASIC_HEADERS)
    return Soup(page.text, lib)

def next_table(el):
    return el.find_next("table") if el else None

def resp_template(r_type, dct: dict) -> dict:
    return {"data": {r_type: dct}}
Ejemplo n.º 11
 def _unescape_part( part ):
     #unescape the delim FIRST
     unesc1 = _sub( _vESC + _vDEXOR, _vDELIM, part )
     #unescape the escape SECOND
     return _sub( _vESC + _vEEXOR, _vESC, unesc1 )
Ejemplo n.º 12
 def _name_from_function(func: ExampleFunction) -> str:
     return _sub("_", " ", func.__name__)
Ejemplo n.º 13
 def _unescape_part(part):
     # unescape the delim FIRST
     unesc1 = _sub(_vESC + _vDEXOR, _vDELIM, part)
     # unescape the escape SECOND
     return _sub(_vESC + _vEEXOR, _vESC, unesc1)
Ejemplo n.º 14
 def _escape_part( part ):
     enc = part.encode() if type(part) is not bytes else part         
     #escape the escape FIRST
     esc1 = _sub(_vESC, _vESC + _vEEXOR, enc )
     #escape the delim SECOND     
     return _sub(_vDELIM, _vESC + _vDEXOR, esc1)
Ejemplo n.º 15
 def _name_from_function(func: Callable) -> str:
     return _sub("_", " ", func.__name__)
Ejemplo n.º 16
 def _escape_part(part):
     enc = part.encode() if type(part) is not bytes else part
     # escape the escape FIRST
     esc1 = _sub(_vESC, _vESC + _vEEXOR, enc)
     # escape the delim SECOND
     return _sub(_vDELIM, _vESC + _vDEXOR, esc1)
 def postprocess(self, seg):
     '''Remove BPE from seg.tgt
     seg.tgt = _sub("(@@ )|(@@ ?$)", '', seg.tgt)
Ejemplo n.º 18
	def remove(self,n):
		_k = lzstr(_sub(str(n), "",self))
		return _k
Ejemplo n.º 19
def create(function, name=None, environment=None, **kwargs):
    Execute arbitrary functions in a remote environment.

    The job is specified as a function. All functions that are called from
    within the function are automatically captured. By default, this method will
    kick off asynchronous work, and return a Job object to monitor/manage that

    function : function
        Function to be executed in this Job, with arguments to pass to this
        function specified by `kwargs`.

    name : str, optional
        Name for this execution (names the returned Job). If set to None, then
        the name of the job is set to the name of the function with a time-stamp.
        Valid characters in job name include: digits, characters, '-' and '_'.

    environment : :class:`~graphlab.deploy.hadoop_cluster.HadoopCluster` | :class:`~graphlab.deploy.ec2_cluster.Ec2Cluster` | :class:`~graphlab.deploy.LocalAsync`, optional
        Optional environment for execution. If set to None, then a `LocalAsync`
        by the name `async` is created and used. This will execute the code in
        the background on your local machine.

        Function kwargs that are passed to the function for execution.

    job : :py:class:`~graphlab.deploy.Job`
        Used for monitoring and managing the execution of the Job.

    See Also
    graphlab.deploy.map_job.create, graphlab.deploy.Job

    Let us start out with a simple example to execute a function that can
    add two numbers.

    .. sourcecode:: python

        # Define a function
        def add(x, y):
            return x + y

        # Create a job.
        job = graphlab.deploy.job.create(add, x=1, y=1)

        # Get results from the execution when ready. This call waits for the
        # job to complete before retrieving the results.
        >>> print job.get_results()

    Exceptions within the function calls can be captured as follows:

    .. sourcecode:: python

        def add(x, y):
            if x and y:
                return x + y
                raise ValueError('x or y cannot be None')

        # Job execution capture the exception raised by the function.
        job = graphlab.deploy.job.create(add, x=1, y=None)

        # Get results from the execution when ready. This call waits for the
        # job to complete before retrieving the results.
        >>> print job.get_results()

        # Get the exceptions raised from this execution by calling
        # job.get_metrics()
        >>> print job.get_metrics()
        | task_name | status | start_time | run_time |   exception_message   |
        |    add    | Failed | 1427928898 |   None   | x or y cannot be None |
        |      exception_traceback      |
        | Traceback (most recent cal... |
        [1 rows x 6 columns]

    If a function requires a package to be installed, the function can be
    annotated with a decorator.

    .. sourcecode:: python

        @graphlab.deploy.required_packages(['names == 0.3.0'])
        def my_function(number = 10):
            import names
            people = [names.get_full_name() for i in range(number)]
            sf = graphlab.SFrame({'names':people})
            return sf

        job = graphlab.deploy.job.create(my_function)

        >>> print job.get_results()

                names    str

        |       names       |
        |   Annette Logan   |
        |   Nancy Anthony   |
        |  Tiffany Zupancic |
        |    Andre Coppin   |
        |     Robert Coe    |
        |    Donald Dean    |
        |    Lynne Bunton   |
        |   John Sartwell   |
        |   Peter Nicholas  |
        | Chester Rodriguez |
        [10 rows x 1 columns]

    Complex functions that require SFrames, GraphLab models etc. can be deployed
    with ease. All additional state required by the function are automatically

    .. sourcecode:: python

        GLOBAL_CONSTANT = 10

        def foo(x):
            return x + 1

        def bar(x):
            return x + 2

        def my_function(x, y):
            foo_x = foo(x)
            bar_y = bar(y)
            return foo_x + bar_y + GLOBAL_CONSTANT

        # Automatically captures all state needed by the deployed function.
        job = graphlab.deploy.job.create(my_function, x = 1, y = 1)

        >>> print job.get_results()

    You can execute the same job remotely by passing a different environment.

    .. sourcecode:: python

        # Define a function
        def add(x, y):
            return x + y

        # Define an EC2 environment
        ec2 = graphlab.deploy.Ec2Config()

        # Create an EC2 cluster object
        c = graphlab.deploy.ec2_cluster.create('my_cluster', 's3://bucket/path', ec2)

        # Create a job.
        job = graphlab.deploy.job.create(add, environment=c, x=1, y=1)

        >>> print job.get_results()

    - When an exception is raised within the deployed function,
      :func:`~graphlab.deploy.Job.get_results` returns None.

    - For asynchronous jobs, :func:`~graphlab.deploy.Job.get_results` is a
      blocking call which will wait for the job execution to complete
      before returning the results.

    _session = _gl.deploy._default_session



    # Name the job
    now = _datetime.now().strftime('%b-%d-%Y-%H-%M-%S')
    function_name = _sub('[<>]','',function.__name__)

    name = '%s-%s' % (function_name, now) if not name else name
    # Validate args
    function, name, environment = _validate_job_create_args(function,
                                                            name, environment)
    while _session.exists(name, _job.Job._typename):
        rand = str(_uuid.uuid4())[:5]
        old_name = name
        name = "%s-%s" % (name, rand)
        __LOGGER__.info("A job with name '%s' already exists. "
                        "Renaming the job to '%s'." % (old_name, name))

    # Setup the task & job
    task = _task.Task(function,function_name)
    job = _job.Job(name, stages=[[task]], environment=environment,
    # Setup the env.
    __LOGGER__.info("Validation complete. Job: '%s' ready for execution." % name)
    exec_env = _env._get_execution_env(environment)
    job = exec_env.run_job(job)

    # Save the job and return to user
    if not isinstance(environment, _environment.Local):
        __LOGGER__.info("Job: '%s' scheduled." % name)
        __LOGGER__.info("Job: '%s' finished." % name)

    return job
Ejemplo n.º 20
Archivo: base.py Proyecto: yassu/Yson
def obtain_suitable_comment(text, reg):
    remove part of matching reg
    return _sub(reg, ' ', text)
 def postprocess_str(self, str):
     return _sub("(@@ )|(@@ ?$)", '', str)