def parse_doctype_range(rangestr):
    """Return a list of the doctypes in the range specified expanded as a list 
    of integers. This is used to validate arguments. The actual range strings
    are passed onto the superfastmatch server.

    >>> parse_doctype_range('1-2:5:7-9')
    [1, 2, 5, 7, 8, 9]
    >>> parse_doctype_range('')
    >>> parse_doctype_range('1')
    [1]
    >>> parse_doctype_range('7-7')
    [7]
    """
    if not rangestr:
        raise Exception('Invalid doctype range ({0})'.format(rangestr))

    split_on_hyphen = lambda s: s.split('-')

    def expand(rng):
        if len(rng) == 1:
            return int(rng[0])
        elif len(rng) == 2:
            return range(int(rng[0]), int(rng[1]) + 1)
        else:
            raise Exception('Unrecognized range data type')

    return (stream.Stream(rangestr.split(':'))
            >> stream.map(split_on_hyphen)
            >> stream.map(expand)
            >> stream.flatten
            >> list)
Beispiel #2
0
def Gregory(type=float):
    """Return partial sums of the Gregory series converging to atan(1) == pi/4.

	Yield 1 - 1/3 + 1/5 - 1/7 + ... computed with the given type.
	"""
    return seq(type(1), step=2) >> map(lambda x: 1 / x) >> alt_sign >> fold(
        operator.add)
    def run( self ):
      if self.type == "TRANSIENT":
        sink = item[:1]
      else:
        sink = min

      dataFilter = MovAvg(10)
      nop = map(myPrint1)
      elements = [nop, nop, nop, nop, nop]

      elements[0] = map(myPrint1)
      elements[1] = map(lambda x: dataFilter(x)) 
      elements[2] = filter(lambda x: bigVals(x, 40))
      elements[3] = map(notify_ctrl)
    
      self.myGen() >> elements[0] >> elements[1] >> elements[2] >> elements[3] >> elements[4] >> sink 
def extract_sopr(options):
    if not os.path.exists(ORIG_DIR):
        mkdir_p(ORIG_DIR)

    if options.get('loglevel', None):
        log.setLevel(options['loglevel'])

    cache_paths = glob(os.path.join(CACHE_DIR, 'sopr/*/*/*.zip'))
    log.debug("cache paths ({num}):".format(num=len(cache_paths)) +
              "\n\t".join(cache_paths))

    extracted = cache_paths >> filter(lambda x: check_ext(x, ext='.zip')) \
                            >> map(lambda p: translate_dir(p,
                                                           from_dir=CACHE_DIR,
                                                           to_dir=ORIG_DIR)) \
                            >> ThreadPool(extract_all_zips)

    for path, destination_dir, num_files in extracted:
        log.info("successfully extracted " +
                 "{path} to {dest_dir} ({num} files)".format(
                    path=path, dest_dir=destination_dir, num=num_files))

    for url, exception in extracted.failure:
        log.error("extracting from {path} failed: {exception}".format(
            url=url, exception=exception))
Beispiel #5
0
def shutdown(poolclass, n):
	e = Executor(poolclass, map(lambda x: x*x), poolsize=2)
	e.submit(*range(n))
	e.shutdown()
	print e.result >> list
	assert e.inputfeeder_thread.is_alive() == False
	assert e.resulttracker_thread.is_alive() == False
	assert e.failuretracker_thread.is_alive() == False
Beispiel #6
0
def shutdown(poolclass, n):
    e = Executor(poolclass, map(lambda x: x * x), poolsize=2)
    e.submit(*range(n))
    e.shutdown()
    print e.result >> list
    assert e.inputfeeder_thread.is_alive() == False
    assert e.resulttracker_thread.is_alive() == False
    assert e.failuretracker_thread.is_alive() == False
Beispiel #7
0
def cancel(poolclass, n):
    e = Executor(poolclass, map(lambda x: x * x), poolsize=2)
    t1 = threading.Thread(target=lambda: e.submit(*range(n // 2)))
    t2 = threading.Thread(target=lambda: e.submit(*range(n // 2)))
    t1.start()
    t2.start()
    cancelled = e.cancel(*range(0, n, 2))
    t1.join()
    t2.join()
    e.close()
    completed = len(e.result >> list)
    print completed, cancelled
    assert completed + cancelled == n
Beispiel #8
0
def cancel(poolclass, n):
	e = Executor(poolclass, map(lambda x: x*x), poolsize=2)
	t1 = threading.Thread(target=lambda: e.submit(*range(n//2)))
	t2 = threading.Thread(target=lambda: e.submit(*range(n//2)))
	t1.start()
	t2.start()
	cancelled = e.cancel(*range(0, n, 2))
	t1.join()
	t2.join()
	e.close()
	completed = len(e.result >> list)
	print completed, cancelled
	assert completed + cancelled == n
Beispiel #9
0
def confirm_download_schedule(schedule):
    """Reports the total number of bytes and total number of files
    to download. Also lists the inaccessible files (based on HEAD
    response). Then asks user to confirm downloading.
    """
    def content_length(tpl):
        return tpl[2][1]

    def status_code(tpl):
        return tpl[2][0]

    def href(tpl):
        return tpl[0]

    def is_OK(tpl):
        return status_code(tpl) == 200

    def not_OK(tpl):
        return status_code(tpl) != 200

    increment = lambda x, _: x + 1
    file_count = (
        schedule >> stream.filter(is_OK) >> stream.reduce(increment, 0))

    bytes_to_download = (
        schedule >> stream.filter(is_OK) >> stream.map(content_length) >> sum)

    inaccessible_files = (schedule >> stream.filter(not_OK) >> list)

    if len(inaccessible_files) > 0:
        print
        print "Some files are inaccessible:"
        for (idx, sched) in enumerate(inaccessible_files):
            print "%d: %d %s" % (idx, status_code(sched), href(sched))

    if bytes_to_download > 0:
        print
        print "Need to download %s in %d files." % (
            pretty_bytes(bytes_to_download), file_count)
        print
        print "Are you sure you want to continue? [Y/n]"
        user_input = raw_input("> ")
        return (user_input.upper() in ("", "Y", "YES"))
    else:
        print
        print "Nothing to download."
        return False
Beispiel #10
0
def confirm_download_schedule(schedule):
    """Reports the total number of bytes and total number of files
    to download. Also lists the inaccessible files (based on HEAD
    response). Then asks user to confirm downloading.
    """

    def content_length(tpl):
        return tpl[2][1]

    def status_code(tpl):
        return tpl[2][0]

    def href(tpl):
        return tpl[0]

    def is_OK(tpl):
        return status_code(tpl) == 200

    def not_OK(tpl):
        return status_code(tpl) != 200

    increment = lambda x, _: x + 1
    file_count = schedule >> stream.filter(is_OK) >> stream.reduce(increment, 0)

    bytes_to_download = schedule >> stream.filter(is_OK) >> stream.map(content_length) >> sum

    inaccessible_files = schedule >> stream.filter(not_OK) >> list

    if len(inaccessible_files) > 0:
        print
        print "Some files are inaccessible:"
        for (idx, sched) in enumerate(inaccessible_files):
            print "%d: %d %s" % (idx, status_code(sched), href(sched))

    if bytes_to_download > 0:
        print
        print "Need to download %s in %d files." % (pretty_bytes(bytes_to_download), file_count)
        print
        print "Are you sure you want to continue? [Y/n]"
        user_input = raw_input("> ")
        return user_input.upper() in ("", "Y", "YES")
    else:
        print
        print "Nothing to download."
        return False
Beispiel #11
0
    def __call__(self):
        """Use mutex to protect self.d."""
        try:
            with QMutexLocker(self.master.mutex):
                #t = QTime()
                #t.start()
                #for node in self.master.tree.nodes:
                #if not self.stopped:
                #node.update(self.master.tree.query, self.query)
                #self.master.tree.query = self.query
                self.master.tree.update(self.query)
                #print(t.elapsed())

                nodes = sorted(self.master.tree.nodes, key=lambda node: node())\
                        >> sm.map(lambda node: Runnable(node, self.master.worker))\
                        >> sm.item[:self.upper_bound]
                model = RunnableModel(nodes)
                self._finished = not self.stopped
                return None if self.stopped else model
        except Exception as e:
            print(e)
Beispiel #12
0
Datei: f.py Projekt: Answeror/lit
    def __call__(self):
        """Use mutex to protect self.d."""
        try:
            with QMutexLocker(self.master.mutex):
                #t = QTime()
                #t.start()
                #for node in self.master.tree.nodes:
                    #if not self.stopped:
                        #node.update(self.master.tree.query, self.query)
                        #self.master.tree.query = self.query
                self.master.tree.update(self.query)
                #print(t.elapsed())

                nodes = sorted(self.master.tree.nodes, key=lambda node: node())\
                        >> sm.map(lambda node: Runnable(node, self.master.worker))\
                        >> sm.item[:self.upper_bound]
                model = RunnableModel(nodes)
                self._finished = not self.stopped
                return None if self.stopped else model
        except Exception as e:
            print(e)
Beispiel #13
0
#define what we need to do moving averages
weights = [1.0/win_len for i in range(win_len)]
def inner(window):
    """ Computes the inner product of window and weights.
    weights must be defined outside to avoid a useless rezipping 
    when using this in a stream.
    """
    acc =  sum((i*w for i,w in zip(window, weights)))
    return acc

#get an infinite stream of uniform random floats
zsource   = repeatcall(rand.random)

# WIRING
# make our moving average window
winstream = ( zsource >> chop(win_len)  )
# compute the windowed average
xstream   = ( winstream >> stream.map(inner) )

# EXECUTING
if view_len > 0:
    ts = time()
    for i in range(view_len):
        fp.write(str(next(xstream.iterator))+'\n')
    print("time: %f" % (time()-ts), file=sys.stderr)
    print("items_per_sec: %f" % (view_len/(time()-ts)), file=sys.stderr)
if view_len < 0:
    while True:
        fp.write(str(next(xstream.iterator))+'\n')
Beispiel #14
0
user    0m7.046s
sys     0m0.020s

$ time python ./feeder.py -s  # sequential

real    0m13.072s
user    0m7.596s
sys     0m0.067s
"""


def blocking_producer():
    for n in range(25):
        time.sleep(0.01)
        yield 42


if __name__ == '__main__':
    f = lambda x: x**x**3
    import sys
    try:
        if sys.argv[1] == '-s':
            ## use a single thread
            blocking_producer() >> map(f) >> reduce(operator.add)
        elif sys.argv[1] == '-t':
            ## use a feeder in a separate thread
            ThreadedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
    except IndexError:
        ## use a feeder in a child process
        ForkedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
Beispiel #15
0
def load_image(flatten=True):
    def load_image_inner(image_filename):
        return Bunch(image_filename=image_filename, image=imread(image_filename, flatten=flatten))

    return stream.map(load_image_inner)
Beispiel #16
0
    odd_ans  = reduce(ops.add, odds )
    static_time = time() - ts

    #streaming computation

    # create our filters
    cong_2 = lambda x: x%2==0
    evens = filter(cong_2)
    odds  = filter(lambda x: not cong_2(x))
    ts = time()
    # wire the split into the filters
    instream >> tee(evens)
    instream >> odds

    # wire up the map and fold (scan/accumulate)
    foldedevens = (evens >> stream.map(math.sqrt) >> fold(ops.add))
    print(time() - ts)
    sqrtodds = odds >> (stream.Processor(my_sqrt))
    print("established the sqrter %f" % (time() - ts))
    foldedodd = sqrtodds >> stream.fold(ops.add)
    print("made odd folder: %f" % (time() - ts))
    # force execution
    soans = foldedodd >> item[-1:]
    print(soans)
    print(time() - ts)
    seans = foldedevens >> item[:]
    print(time() - ts)
    stream_time = time() - ts

    #print(even_ans)
    #print(seans >> item[:])
Beispiel #17
0
def test_ForkedFeeder():
    result = ForkedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
    pprint(result)
    assert result == expected
Beispiel #18
0
def submit(poolclass, n):
	e = Executor(poolclass, map(lambda x: x*x), poolsize=3)
	e.submit(*range(n))
	e.close()
	assert sum(e.result) == result[n]
Beispiel #19
0
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

import stream


## Test scenario based on ../example/feeder.py

def blocking_producer():
    for n in range(25):
        time.sleep(0.01)
        yield 42

f = lambda x: x ** 2

expected = blocking_producer() >> stream.map(f) >> stream.reduce(operator.add)


## Test cases

def test_ThreadedFeeder():
    result = stream.ThreadedFeeder(blocking_producer) >> stream.map(f) >> stream.reduce(operator.add)
    pprint(result)
    assert result == expected


def test_ForkedFeeder():
    result = stream.ForkedFeeder(blocking_producer) >> stream.map(f) >> stream.reduce(operator.add)
    pprint(result)
    assert result == expected
Beispiel #20
0
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

import stream

## Test scenario based on ../example/feeder.py


def blocking_producer():
    for n in range(25):
        time.sleep(0.01)
        yield 42


f = lambda x: x**2

expected = blocking_producer() >> stream.map(f) >> stream.reduce(operator.add)

## Test cases


def test_ThreadedFeeder():
    result = stream.ThreadedFeeder(blocking_producer) >> stream.map(
        f) >> stream.reduce(operator.add)
    pprint(result)
    assert result == expected


def test_ForkedFeeder():
    result = stream.ForkedFeeder(blocking_producer) >> stream.map(
        f) >> stream.reduce(operator.add)
    pprint(result)
Beispiel #21
0
def test_ThreadedFeeder():
    result = stream.ThreadedFeeder(blocking_producer) >> stream.map(
        f) >> stream.reduce(operator.add)
    pprint(result)
    assert result == expected
Beispiel #22
0
def alternating(n):
    values = []
    for i in range(1, n + 1):
        values.append(i)
        values.append(-i)
    return values


def randomized(n):
    values = []
    for _ in range(n):
        values.append(randint(-sys.maxint, sys.maxint))
    return values

for v in [10, 100, 1000] >> stream.map(alternating):
    dataset.append(v)

for v in [10, 100, 1000] >> stream.map(randomized):
    dataset.append(v)

func = stream.filter(lambda x: x & 1)

resultset = dataset >> stream.map(lambda s: s >> func >> set) >> list


## Test scenario

def threadpool(i):
    result = dataset[i] >> stream.ThreadPool(func, poolsize=2) >> set
    pprint(result)
Beispiel #23
0
real    0m7.231s
user    0m7.046s
sys     0m0.020s

$ time python ./feeder.py -s  # sequential

real    0m13.072s
user    0m7.596s
sys     0m0.067s
"""

def blocking_producer():
	for n in range(25):
		time.sleep(0.01)
		yield 42

if __name__ == '__main__':
	f = lambda x: x**x**3
	import sys
	try:
		if sys.argv[1] == '-s':
			## use a single thread
			blocking_producer() >> map(f) >> reduce(operator.add)
		elif sys.argv[1] == '-t':
			## use a feeder in a separate thread
			ThreadedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
	except IndexError:
		## use a feeder in a child process
		ForkedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
Beispiel #24
0
dataset = []

def alternating(n):
	values = []
	for i in range(1, n+1):
		values.append(i)
		values.append(-i)
	return values

def randomized(n):
	values = []
	for _ in range(n):
		values.append(randint(-sys.maxint, sys.maxint))
	return values

for v in [10, 100, 1000] >> map(alternating):
	dataset.append(v)

for v in [10, 100, 1000] >> map(randomized):
	dataset.append(v)

func = filter(lambda x: x&1)

resultset = dataset >> map(lambda s: s >> func >> set) >> list


## Test scenario

def threadpool(i):
	result = dataset[i] >> ThreadPool(func, poolsize=2) >> set
	pprint(result)
Beispiel #25
0
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

from stream import ThreadedFeeder, ForkedFeeder, map, reduce


## Test scenario based on ../example/feeder.py

def blocking_producer():
	for n in range(25):
		time.sleep(0.01)
		yield 42

f = lambda x: x**2

expected = blocking_producer() >> map(f) >> reduce(operator.add)


## Test cases

def test_ThreadedFeeder():
	result = ThreadedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
	pprint(result)
	assert result == expected

def test_ForkedFeeder():
	result = ForkedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
	pprint(result)
	assert result == expected

Beispiel #26
0
def submit(poolclass, n):
    e = Executor(poolclass, map(lambda x: x * x), poolsize=3)
    e.submit(*range(n))
    e.close()
    assert sum(e.result) == result[n]
Beispiel #27
0
def Gregory(type=float):
	"""Return partial sums of the Gregory series converging to atan(1) == pi/4.

	Yield 1 - 1/3 + 1/5 - 1/7 + ... computed with the given type.
	"""
	return seq(type(1), step=2) >> map(lambda x: 1/x) >> alt_sign >> fold(operator.add)
Beispiel #28
0
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

from stream import ThreadedFeeder, ForkedFeeder, map, reduce

## Test scenario based on ../example/feeder.py


def blocking_producer():
    for n in range(25):
        time.sleep(0.01)
        yield 42


f = lambda x: x**2

expected = blocking_producer() >> map(f) >> reduce(operator.add)

## Test cases


def test_ThreadedFeeder():
    result = ThreadedFeeder(blocking_producer) >> map(f) >> reduce(
        operator.add)
    pprint(result)
    assert result == expected


def test_ForkedFeeder():
    result = ForkedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
    pprint(result)
    assert result == expected
Beispiel #29
0
def test_ForkedFeeder():
    result = stream.ForkedFeeder(blocking_producer) >> stream.map(f) >> stream.reduce(operator.add)
    pprint(result)
    assert result == expected
Beispiel #30
0
def test_ThreadedFeeder():
	result = ThreadedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
	pprint(result)
	assert result == expected