Exemple #1
0
def setup_parallel():
    pm = ProgressMonitor(Name='MainWorkflow',
                         DbConnection=MockProgressManager())
    a = ProgressTracker(Name='CopyFiles', FriendlyId='a',
                        HasParallelChildren=True)
    b = ProgressTracker(Name='CreateFolder', FriendlyId='b')
    c = ProgressTracker(Name='CopyFiles', FriendlyId='c',
                        EstimatedSeconds=10)
    b1 = ProgressTracker(Name='CreateFolder', FriendlyId='br1')
    c1 = ProgressTracker(Name='CopyFiles', FriendlyId='c1',
                         EstimatedSeconds=11)
    b2 = ProgressTracker(Name='CreateFolder', FriendlyId='b2')
    c2 = ProgressTracker(Name='CopyFiles', FriendlyId='c2',
                         EstimatedSeconds=12)
    d2 = ProgressTracker(Name='CopyFiles', FriendlyId='c2',
                         EstimatedSeconds=12)
    assert a.friendly_id == 'a'
    b.with_tracker(c)
    b1.with_tracker(c1)
    b2.with_tracker(c2)
    c2.with_tracker(d2)
    a.with_tracker(b)
    a.with_tracker(b1)
    a.with_tracker(b2)
    pm.with_tracker(a)
    return pm
Exemple #2
0
def test_can_total_progress_with_child_events():
    pm = ProgressMonitor(DbConnection=MockProgressManager())
    a = ProgressTracker(Name='CopyFiles', FriendlyId='abc')
    b = ProgressTracker(Name='CreateFolder')
    c = ProgressTracker(Name='CopyFiles')
    d = ProgressTracker(Name='SendEmail')
    a.with_tracker(b)
    b.with_tracker(c).with_tracker(d)
    pm.with_tracker(a)
    assert len(pm.all_children) == 4
Exemple #3
0
def test_can_start_all_parents(c_mock, g_mock):
    g_mock.side_effect = get_by_id_side_effect
    c_mock.side_effect = children_side_effect
    pm = ProgressMonitor(DbConnection=RedisProgressManager())
    pm = pm.load('94a52a41-bf9e-43e3-9650-859f7c263dc8')
    t = pm.find_id('039fe353-2c01-49f4-a743-b09c02c9f683')
    assert t
    t.start(Parents=True)
    assert t.parent.status == 'In Progress'
    print t.parent.id
    assert t.parent.parent.status == 'In Progress'
Exemple #4
0
def setup_basic():
    pm = ProgressMonitor(Name='MainWorkflow',
                         DbConnection=MockProgressManager())
    a = ProgressTracker(Name='CopyFiles', FriendlyId='a')
    b = ProgressTracker(Name='CreateFolder', FriendlyId='b')
    c = ProgressTracker(Name='CopyFiles', FriendlyId='c',
                        EstimatedSeconds=10)
    assert a.friendly_id == 'a'
    pm.with_tracker(a)
    a.with_tracker(b)
    b.with_tracker(c)
    return pm
Exemple #5
0
    def search(self):
        indices = [0]
        queries = self.queries
        if self.dictionary_on and type(self.dictionary_text) is list:
            if type(
                    queries
            ) is list:  ## queries starts as str, but becomes list if queries are given (don't ask)
                queries = queries + self.dictionary_text
            else:
                queries = self.dictionary_text

        with ProgressMonitor().task(100, 'Starting search..') as monitor:
            monitor.add_listener(self.callback)
            index = get_index(self.corpus, monitor=monitor.submonitor(50))

            if QUERY_MODES[self.query_mode] == 'filter':
                # simple search
                query = " OR ".join('({})'.format(parse_query(q)[1])
                                    for q in queries)
                if not self.context_window:
                    selected = list(index.search(query))
                    sample = self.corpus[selected]
                else:
                    sample = self.corpus.copy()
                    sample._tokens = sample._tokens.copy()
                    selected = []
                    for i, context in index.get_context(
                            query, int(self.context_window)):
                        sample._tokens[i] = context
                        selected.append(i)
                    sample = sample[selected]
                o = np.ones(len(self.corpus))
                o[selected] = 0
                remaining = np.nonzero(o)[0]
                remaining = self.corpus[remaining]
            else:
                sample = self.corpus.copy()
                remaining = None
                seen = set()
                for q in queries:
                    label, q = parse_query(q)
                    # todo: implement as sparse matrix!
                    scores = np.zeros(len(sample), dtype=np.float)

                    for i, j in index.search(q, frequencies=True):
                        seen.add(i)
                        scores[i] = j
                    scores = scores.reshape((len(sample), 1))
                    sample.extend_attributes(scores, [label])
                if self.include_unmatched:
                    remaining = None
                else:
                    selected = list(seen)
                    o = np.ones(len(self.corpus))
                    o[selected] = 0
                    remaining = np.nonzero(o)[0]
                    remaining = self.corpus[remaining]
                    sample = sample[selected]
            return sample, remaining
Exemple #6
0
def get_index(corpus: Corpus, monitor: ProgressMonitor, multiple_processors=False, **kargs) -> Index:
    """
    Get the index for the provided corpus, reindexing (and tokenizing) if needed
    """
    with _GLOBAL_LOCK:
        if not hasattr(corpus, "_orange3sma_index_lock"):
            corpus._orange3sma_index_lock = Lock()
    with corpus._orange3sma_index_lock:
        ix = getattr(corpus, "_orange3sma_index", None)
        if not (ix and ix.tokens is corpus._tokens):
            monitor.update(0, "Getting tokens")
            corpus.tokens  # force tokens
            procs = max(1, multiprocessing.cpu_count()-1) if multiple_processors else 1
            monitor.update(50, "Creating index")
            ix = Index(corpus, procs=procs, **kargs)
            corpus._orange3sma_index = ix
    return ix
Exemple #7
0
def get_counts(
        corpus: Corpus, monitor: ProgressMonitor
) -> Tuple[Mapping[str, int], Mapping[str, int]]:
    monitor.update(0, "Getting tokens")
    tokens = corpus.tokens  # forces tokens to be created
    n = len(tokens)
    tf, df = {}, {}  # tf: {word : freq}, df: {word: {doc_i, ...}}
    monitor.update(50, "Counting words")
    with monitor.subtask(50) as sm:
        sm.begin(n)
        for i, doc_tokens in enumerate(tokens):
            sm.update(message="Counting words {i}/{n}".format(**locals()))
            for t in doc_tokens:
                df.setdefault(t, set()).add(i)
                tf[t] = tf.get(t, 0) + 1
        df = {w: len(df[w]) for w in df}
        return tf, df
Exemple #8
0
 def calculate(self):
     with ProgressMonitor().task(100,
                                 'Calculating statistics..') as monitor:
         monitor.add_listener(self.callback)
         if self.reference_corpus:
             t = compare(self.corpus,
                         self.reference_corpus,
                         monitor=monitor)
         else:
             t = frequencies(self.corpus, monitor=monitor)
         return t
Exemple #9
0
def compare(corpus: Corpus, reference_corpus: Corpus,
            monitor: ProgressMonitor):
    tf1, df1 = get_counts(corpus, monitor.submonitor(40))
    tf2, df2 = get_counts(reference_corpus, monitor.submonitor(40))

    words = list(set(df1.keys()) | set(df2.keys()))
    counts = np.fromiter((tf1.get(t, 0) for t in words), int)
    docfreqs = np.fromiter((df1.get(t, 0) for t in words), int)
    refcounts = np.fromiter((tf2.get(t, 0) for t in words), int)
    refdocfreqs = np.fromiter((df2.get(t, 0) for t in words), int)

    relc, relcr = _relfreq(counts), _relfreq(refcounts)
    over = relc / relcr
    return _create_table(
        words,
        OrderedDict([
            ("percent", relc),
            ("frequency", counts),
            ("docfreq", docfreqs),
            ("overrepresentation", over),
            ("reference_percent", relcr),
            ("reference_frequency", refcounts),
            ("reference_docfreq", refdocfreqs),
        ]))
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.

# Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at

#    http://aws.amazon.com/asl/

# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License.
import redis
from progressmonitor import RedisProgressManager, ProgressMonitor, \
    ProgressTracker
pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
rroot = RedisProgressManager(RedisConnection=r)
root = ProgressMonitor(DbConnection=rroot, Name="MasterWorkflow")
wf_a = ProgressTracker(Name='Workflow A', FriendlyId='WorkflowA')
wf_b = ProgressTracker(Name='Workflow B', FriendlyId='WorkflowB')
wf_b_1 = ProgressTracker(Name='SubWorkflow B1', FriendlyId='WorkflowB1')
wf_b_2 = ProgressTracker(Name='SubWorkflow B2', FriendlyId='WorkflowB2')
task_a1 = ProgressTracker(Name='Task A-1', FriendlyId='TaskA1')
task_a2 = ProgressTracker(Name='Task A-2', FriendlyId='TaskA2')
task_b2_1 = ProgressTracker(Name='Task B2-1', FriendlyId='TaskB21')
root.with_tracker(wf_a).with_tracker(wf_b)
wf_b.with_tracker(wf_b_1).with_tracker(wf_b_2)
wf_a.with_tracker(task_a1).with_tracker(task_a2)
wf_b_2.with_tracker(task_b2_1)
print "Total items in workflow: {}".format(root.all_children_count)
print "Total items not started: {}".format(root.not_started_count)
print task_b2_1.status, wf_b_2.status, wf_b.status, root.status
task_b2_1.start(Parents=True)
print "Total items started: {}".format(root.in_progress_count)
print "Percentage started: {}".format(root.in_progress_pct)
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.

# Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at

#    http://aws.amazon.com/asl/

# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License.
import redis
from progressmonitor import RedisProgressManager, ProgressMonitor, \
    ProgressTracker
pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
rpm = RedisProgressManager(RedisConnection=r)
pm = ProgressMonitor(DbConnection=rpm, Name="MasterWorkflow")
wf_a = ProgressTracker(Name='Workflow A', FriendlyId='WorkflowA')
wf_b = ProgressTracker(Name='Workflow B', FriendlyId='WorkflowB')
wf_b_1 = ProgressTracker(Name='SubWorkflow B1', FriendlyId='WorkflowB1')
wf_b_2 = ProgressTracker(Name='SubWorkflow B2', FriendlyId='WorkflowB2')
task_a1 = ProgressTracker(Name='Task A-1', FriendlyId='TaskA1')
task_a2 = ProgressTracker(Name='Task A-2', FriendlyId='TaskA2')
task_b2_1 = ProgressTracker(Name='Task B2-1', FriendlyId='TaskB21')
pm.with_tracker(wf_a).with_tracker(wf_b)
wf_b.with_tracker(wf_b_1).with_tracker(wf_b_2)
wf_a.with_tracker(task_a1).with_tracker(task_a2)
wf_b_2.with_tracker(task_b2_1)
task_b2_1.start(Parents=True)
print "Total items started: {}".format(pm.in_progress_count)
print "Percentage started: {}".format(pm.in_progress_pct)
pm.update_all()
id = wf_b.id
pm2 = ProgressMonitor(DbConnection=rpm)
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.

# Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at

#    http://aws.amazon.com/asl/

# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License.
import redis
import time
from progressmonitor import RedisProgressManager, ProgressMonitor, \
    ProgressTracker
pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
rpm = RedisProgressManager(RedisConnection=r)
pm = ProgressMonitor(DbConnection=rpm, Name="MasterWorkflow")
task = ProgressTracker(Name='SingleTask', FriendlyId='MyTask')
pm.with_tracker(task)
print pm.status, task.status
print pm.start()
print pm.status, task.status
time.sleep(1)
print pm.elapsed_time_in_seconds, task.elapsed_time_in_seconds
task.start()
time.sleep(1)
print pm.status, task.status
print pm.elapsed_time_in_seconds, task.elapsed_time_in_seconds
task.succeed()
pm.succeed()
print pm.status, task.status
print pm.elapsed_time_in_seconds, task.elapsed_time_in_seconds
Exemple #13
0
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.

# Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at

#    http://aws.amazon.com/asl/

# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License.
from progressmonitor import ProgressMonitor, ProgressTracker, DynamoDbDriver
root = ProgressMonitor(DbConnection=DynamoDbDriver(TablePrefix='test'),
                       Name="MasterWorkflow")
wf_a = ProgressTracker(Name='Workflow A', FriendlyId='WorkflowA')
wf_b = ProgressTracker(Name='Workflow B', FriendlyId='WorkflowB')
wf_b_1 = ProgressTracker(Name='SubWorkflow B1', FriendlyId='WorkflowB1')
wf_b_2 = ProgressTracker(Name='SubWorkflow B2', FriendlyId='WorkflowB2')
task_a1 = ProgressTracker(Name='Task A-1', FriendlyId='TaskA1')
task_a2 = ProgressTracker(Name='Task A-2', FriendlyId='TaskA2')
task_b2_1 = ProgressTracker(Name='Task B2-1', FriendlyId='TaskB21')
root.with_tracker(wf_a).with_tracker(wf_b)
wf_b.with_tracker(wf_b_1).with_tracker(wf_b_2)
wf_a.with_tracker(task_a1).with_tracker(task_a2)
wf_b_2.with_tracker(task_b2_1)
task_b2_1.start(Parents=True)
print "Total items started: {}".format(root.in_progress_count)
print "Percentage started: {}".format(root.in_progress_pct)
root.update_all()
id = root.id
root2 = ProgressMonitor(DbConnection=DynamoDbDriver)
print "Total items: {}".format(root2.all_children_count)
root2 = root.load(id)
print "Total items started: {}".format(root2.in_progress_count)
print "Percentage started: {}".format(root2.in_progress_pct)
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.

# Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at

#    http://aws.amazon.com/asl/

# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License.
import redis
import time
from progressmonitor import RedisProgressManager, ProgressMonitor, ProgressTracker
pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
rpm = RedisProgressManager(RedisConnection=r)
pm = ProgressMonitor(DbConnection=rpm)
c = ProgressTracker(Name='TestWorkflow').with_metric(Namespace='dev_testing',
                                                          Metric='OS/Startup')
c.metric.with_dimension('linux_flavor', 'redhat') \
        .with_dimension('version', '6.8')
pm.with_tracker(c)
pm.update_all()
c.start(Parents=True)
pm.update_all()
print 'sleeping'
time.sleep(2)
c.succeed()
pm.update_all()
print c.elapsed_time_in_seconds
print c.start_time
print c.finish_time
Exemple #15
0
def test_can_convert_from_db(c_mock, g_mock):
    g_mock.side_effect = get_by_id_side_effect
    c_mock.side_effect = children_side_effect
    pm = ProgressMonitor(DbConnection=RedisProgressManager())
    pm = pm.load('94a52a41-bf9e-43e3-9650-859f7c263dc8')
    assert len(pm.all_children) == 5
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.

# Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at

#    http://aws.amazon.com/asl/

# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License.
import redis
import time
from progressmonitor import RedisProgressManager, ProgressMonitor, \
    ProgressTracker
pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
rpm = RedisProgressManager(RedisConnection=r)
pm = ProgressMonitor(DbConnection=rpm, Name="MasterWorkflow")
task_a = ProgressTracker(Name='Task A', FriendlyId='TaskA')
task_b = ProgressTracker(Name='Task B', FriendlyId='TaskB')
task_c = ProgressTracker(Name='Task C', FriendlyId='TaskC')
pm.with_tracker(task_a).with_tracker(task_b).with_tracker(task_c)
print pm.status, task_a.status
print pm.start()
print pm.status, task_a.status, task_b.status, task_c.status
time.sleep(1)
task_a.start()
time.sleep(1)
task_b.start()
time.sleep(1)
task_c.start()
print pm.elapsed_time_in_seconds, \
      task_a.elapsed_time_in_seconds, \
      task_b.elapsed_time_in_seconds, \
Exemple #17
0
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.

# Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at

#    http://aws.amazon.com/asl/

# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License.
import redis
from progressmonitor import RedisProgressManager, ProgressMonitor, \
    ProgressTracker
pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
rpm = RedisProgressManager(RedisConnection=r)
pm = ProgressMonitor(DbConnection=rpm, Name="MasterWorkflow")
wf_a = ProgressTracker(Name='Workflow A',
                       FriendlyId='WorkflowA',
                       HasParallelChildren=True)
wf_b = ProgressTracker(Name='Workflow B', FriendlyId='WorkflowB')
wf_b_1 = ProgressTracker(Name='SubWorkflow B1', FriendlyId='WorkflowB1')
wf_b_2 = ProgressTracker(Name='SubWorkflow B2', FriendlyId='WorkflowB2')
task_a1 = ProgressTracker(Name='Task A-1', EstimatedSeconds=10)
wf_a_1 = ProgressTracker(Name='SubWorkflow A1', HasParallelChildren=True)
wf_a1_1 = ProgressTracker(Name='SubWorkflow A1, Task 1', EstimatedSeconds=20)
wf_a1_2 = ProgressTracker(Name='SubWorkflow A1, Task 2', EstimatedSeconds=30)
pm.with_tracker(wf_a).with_tracker(wf_b)
wf_b.with_tracker(wf_b_1).with_tracker(wf_b_2)
wf_a_1.with_tracker(wf_a1_1).with_tracker(wf_a1_2)
wf_a.with_tracker(task_a1).with_tracker(wf_a_1)
print "Total estimated seconds: {}".format(pm.total_estimate)
Exemple #18
0
# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.

# Licensed under the Amazon Software License (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at

#    http://aws.amazon.com/asl/

# or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and limitations under the License.
import redis
from progressmonitor import RedisProgressManager, ProgressMonitor, \
    ProgressTracker
pool = redis.ConnectionPool(host='localhost', port=6379, db=0)
r = redis.Redis(connection_pool=pool)
rroot = RedisProgressManager(RedisConnection=r)
root = ProgressMonitor(DbConnection=rroot, Name="MasterWorkflow")
wf_a = ProgressTracker(Name='Workflow A', FriendlyId='WorkflowA')
wf_b = ProgressTracker(Name='Workflow B', FriendlyId='WorkflowB')
wf_b_1 = ProgressTracker(Name='SubWorkflow B1', FriendlyId='WorkflowB1')
wf_b_2 = ProgressTracker(Name='SubWorkflow B2', FriendlyId='WorkflowB2')
task_a1 = ProgressTracker(Name='Task A-1', FriendlyId='TaskA1')
task_a2 = ProgressTracker(Name='Task A-2', FriendlyId='TaskA2')
task_b2_1 = ProgressTracker(Name='Task B2-1', FriendlyId='TaskB21')
root.with_tracker(wf_a).with_tracker(wf_b)
wf_b.with_tracker(wf_b_1).with_tracker(wf_b_2)
wf_a.with_tracker(task_a1).with_tracker(task_a2)
wf_b_2.with_tracker(task_b2_1)
task_b2_1.start(Parents=True)
print "Total items started: {}".format(root.in_progress_count)
print "Percentage started: {}".format(root.in_progress_pct)
root.update_all()
id = root.id
root2 = ProgressMonitor(DbConnection=rroot)