Python chunks Beispiele, util.chunks Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: dnsmasq.py Projekt: carriercomm/dnsproxy

def generate(config, dnat=False, test=True):
    public_ip = config["public_ip"]
    current_ip = config["base_ip"]
    dnsmasq_content = ""
    for group in config["groups"].values():
        if not dnat:
            c = chunks([proxy["domain"] for proxy in group["proxies"]], 5)
        else:
            c = chunks([proxy["domain"] for proxy in group["proxies"] if proxy["dnat"]], 5)

        for chunk in c:
            if not dnat:
                dnsmasq_content += generate_dns(chunk, public_ip)
            else:
                dnsmasq_content += generate_dns(chunk, current_ip)

    if test:
        if not dnat:
            dnsmasq_content += generate_dns('ptest.verdandi.is', public_ip)
            dnsmasq_content += generate_dns('ptest2.verdandi.is', public_ip)
        else:
            dnsmasq_content += generate_dns('ptest.verdandi.is', current_ip)
            dnsmasq_content += generate_dns('ptest2.verdandi.is', current_ip)

    if dnat:
        for group in config["groups"].values():
            for proxy in group["proxies"]:
                if not proxy["dnat"]:
                    current_ip = long2ip(ip2long(current_ip) + 1)
                    dnsmasq_content += generate_dns(proxy["domain"], current_ip)

    return dnsmasq_content

Beispiel #2

0

Datei anzeigen

Datei: fr3dnet_trainer.py Projekt: gzuidhof/luna16

    def train(self, X_train, X_val):

        train_true = filter(lambda x: x[2]==1, X_train)
        train_false = filter(lambda x: x[2]==0, X_train)

        val_true = filter(lambda x: x[2]==1, X_val)
        val_false = filter(lambda x: x[2]==0, X_val)

        n_train_true = len(train_true)
        n_val_true = len(val_true)

        make_epoch_helper = functools.partial(make_epoch, train_true=train_true, train_false=train_false, val_true=val_true, val_false=val_false)

        logging.info("Starting training...")
        epoch_iterator = ParallelBatchIterator(make_epoch_helper, range(P.N_EPOCHS), ordered=False, batch_size=1, multiprocess=False, n_producers=1)

        for epoch_values in epoch_iterator:
            self.pre_epoch()
            train_epoch_data, val_epoch_data = epoch_values

            train_epoch_data = util.chunks(train_epoch_data, P.BATCH_SIZE_TRAIN)
            val_epoch_data = util.chunks(val_epoch_data, P.BATCH_SIZE_VALIDATION)

            self.do_batches(self.train_fn, train_epoch_data, self.train_metrics)
            self.do_batches(self.val_fn, val_epoch_data, self.val_metrics)

            self.post_epoch()
            logging.info("Setting learning rate to {}".format(P.LEARNING_RATE  * ((0.985)**self.epoch)))
            self.l_r.set_value(P.LEARNING_RATE  * ((0.985)**self.epoch))

Beispiel #3

0

Datei anzeigen

Datei: merkle_damgard.py Projekt: mikez302/cryptopals_solutions

 def __call__(self, message, state=None, *, pad=True):
     state = state or self.initial_state
     prepared_message = message + (self.padding(len(message)) if pad else b"")
     assert len(prepared_message) % self.block_size == 0
     for block in chunks(prepared_message, self.block_size):
         state = self.compress(state, block)
     return state

Beispiel #4

0

Datei anzeigen

Datei: afsutil.py Projekt: ahltorp/afssync

 def getstatusforfids(self, fids):
     status = {}
     
     for chunk in chunks(fids, 50):
         for f in arlalow.fetchbulkstatus(self.fsconn, chunk):
             status[f["fid"]] = f["status"]
     return status

Beispiel #5

0

Datei anzeigen

Datei: extract.py Projekt: gzuidhof/text-mining

def extract_all_labels(filenames, out_filepath=DATA_FOLDER+'labels.p', chunk_size=2000):
    print "EXTRACTING ALL LABELS INTO {0}".format(out_filepath)
    all_labels = []
    label_dict = {}

    filenames_chunks = util.chunks(filenames, chunk_size)

    for i, chunk in enumerate(filenames_chunks):
        pool = Pool(processes=util.CPU_COUNT)
        chunk_labels = pool.map(extract_labels, chunk)
        pool.close()

        for filepath, labels in zip(chunk, chunk_labels):
            if labels is not None:
                file_id = util.filename_without_extension(filepath)
                label_dict[file_id] = labels
                all_labels += labels

        print i+1, '/', len(filenames_chunks)

    #Write labels to file
    with open(out_filepath,'w') as f:
        pickle.dump(label_dict, f)

    print '\nLabels:'
    print len(set(all_labels))
    print Counter(all_labels)

Beispiel #6

0

Datei anzeigen

Datei: TransitHeuristic.py Projekt: SUTDMEC/NSE_Validation

    def predict(self, data, modes):
        """predict whether a list of position follows atrain route by detecting
        the nearest train stops. Input is the pandas data frame of
        measurements and an array of current mode predictions.  Returns
        an array of predicted modes of the same size as the input data
        frame has rows.

        """
        # extract lat/lon from data frame
        lat = data['WLATITUDE'].values
        lon = data['WLONGITUDE'].values

        # chunk is a tuple (start_idx, end_idx, mode)
        for start_idx, end_idx, _ in ifilter(lambda chunk: chunk[2] in [MODE_CAR, MODE_BUS, MODE_TRAIN],
                                             chunks(modes, include_values=True)):
            # test for distance first
            lat_seg = lat[start_idx:end_idx]
            lon_seg = lon[start_idx:end_idx]
            valid_lat_seg = lat_seg[np.where(np.invert(np.isnan(lat_seg)))[0]]
            valid_lon_seg = lon_seg[np.where(np.invert(np.isnan(lon_seg)))[0]]

            if len(valid_lon_seg) == 0:
                continue
            # TODO: parameters have to be tuned carefully
            is_train = predict_mode_by_location(valid_lat_seg,
                                                valid_lon_seg,
                                                self.train_location_tree,
                                                self.train_location_dict,
                                                self.train_route_dict,
                                                dist_thre = 400,
                                                dist_pass_thres = 7, 
                                                num_stops_thre = 3,
                                                dist_pass_thres_perc = 0.7)

            #check entry point distance
            entry_pt_near = -1
            exit_pt_near = -1

            if start_idx-1>=0:
                if not np.isnan(lat[start_idx-1]):
                    nearest_station = find_nearest_station(lat[start_idx-1], lon[start_idx-1], self.train_location_tree, self.dist_thres_entry_exit)
                    if len(nearest_station)!=0:
                        entry_pt_near = 1
                    else:
                        entry_pt_near = 0

            if end_idx < len(modes):
                if not np.isnan(lat[end_idx]):
                    nearest_station = find_nearest_station(lat[end_idx],lon[end_idx],
                                                           self.train_location_tree,
                                                           self.dist_thres_entry_exit)
                    if len(nearest_station)!=0:
                        exit_pt_near = 1
                    else:
                        exit_pt_near = 0
            if is_train or entry_pt_near + exit_pt_near == 2:
                modes[start_idx:end_idx] = MODE_TRAIN
            else:
                modes[start_idx:end_idx] = MODE_CAR
        return modes

Beispiel #7

0

Datei anzeigen

Datei: twitter_tracker.py Projekt: bianjiang/tweetf0rm

def collect_tweets_by_ids(tweet_ids_config_filepath, output_folder, config):

    apikeys = list(config['apikeys'].values()).pop()

    tweet_ids_config = {}
    with open(os.path.abspath(tweet_ids_config_filepath), 'r') as tweet_ids_config_rf:
        tweet_ids_config = json.load(tweet_ids_config_rf)

    max_range = 100
    
    current_ix = tweet_ids_config['current_ix'] if ('current_ix' in tweet_ids_config) else 0
    total = len(tweet_ids_config['tweet_ids'][current_ix:])
    tweet_id_chuncks = util.chunks(tweet_ids_config['tweet_ids'][current_ix:], max_range)

    for tweet_ids in tweet_id_chuncks:
        try:
            twitterCralwer = TwitterCrawler(apikeys=apikeys, client_args=CLIENT_ARGS, output_folder = output_folder)
            twitterCralwer.lookup_tweets_by_ids(tweet_ids)
            current_ix += len(tweet_ids)

        except Exception as exc:
            logger.error(exc)
            logger.error(util.full_stack()) #don't care, if Ctrl+c is hit, does not handle it.  When you restart, it restarts from the last chunk (too much trouble to handle Ctrl + c).
            # you will get duplicate tweets, so what...
            pass

        tweet_ids_config['current_ix'] = current_ix
        
        flash_cmd_config(tweet_ids_config, tweet_ids_config_filepath, output_folder)

        logger.info('COMPLETED -> (current_ix: [%d/%d])'%(current_ix, total))
        logger.info('PAUSE %ds to CONTINUE...'%WAIT_TIME)
        time.sleep(WAIT_TIME)
    else:
        logger.info('[tweets_by_ids] ALL COMPLETED')

Beispiel #8

0

Datei anzeigen

Datei: Switch.py Projekt: AngelMarc/poclbm-1

	def decode(self, server, block_header, target, job_id = None, extranonce2 = None):
		if block_header:
			job = Object()
	
			binary_data = block_header.decode('hex')
			data0 = np.zeros(64, np.uint32)
			data0 = np.insert(data0, [0] * 16, unpack('IIIIIIIIIIIIIIII', binary_data[:64]))
	
			job.target	  = np.array(unpack('IIIIIIII', target.decode('hex')), dtype=np.uint32)
			job.header	  = binary_data[:68]
			job.merkle_end  = np.uint32(unpack('I', binary_data[64:68])[0])
			job.time		= np.uint32(unpack('I', binary_data[68:72])[0])
			job.difficulty  = np.uint32(unpack('I', binary_data[72:76])[0])
			job.state	   = sha256(STATE, data0)
			job.f		   = np.zeros(8, np.uint32)
			job.state2	  = partial(job.state, job.merkle_end, job.time, job.difficulty, job.f)
			job.targetQ	 = 2**256 / int(''.join(list(chunks(target, 2))[::-1]), 16)
			job.job_id	  = job_id
			job.extranonce2 = extranonce2
			job.server	  = server
	
			calculateF(job.state, job.merkle_end, job.time, job.difficulty, job.f, job.state2)

			if job.difficulty != self.difficulty:
				self.set_difficulty(job.difficulty)
	
			return job

Beispiel #9

0

Datei anzeigen

Datei: Switch.py Projekt: snoopcode/poclbm-skc

    def decode(self, server, block_header, target, job_id = None, extranonce2 = None):
        if block_header:
            job = Object()

            binary_data = block_header.decode('hex')

            #data0 = list(unpack('<16I', binary_data[:64])) + ([0] * 48)

            job.headerX = binary_data[:76]
            job.dataX = unpack('<19I', job.headerX)
            job.target		= unpack('<8I', target.decode('hex'))
            job.header		= binary_data[:68]
            job.merkle_end	= uint32(unpack('<I', binary_data[64:68])[0])
            job.time		= uint32(unpack('<I', binary_data[68:72])[0])
            job.difficulty	= uint32(unpack('<I', binary_data[72:76])[0])
            # job.state		= sha256(STATE, data0)
            job.targetQ		= 2**256 / int(''.join(list(chunks(target, 2))[::-1]), 16)
            job.job_id		= job_id
            job.extranonce2	= extranonce2
            job.server		= server

            if job.difficulty != self.difficulty:
                self.set_difficulty(job.difficulty)

            return job

Beispiel #10

0

Datei anzeigen

Datei: block_tools.py Projekt: mikez302/cryptopals_solutions

def crack_ecb_oracle(oracle_fn, prefix_length=0):
    block_size = guess_block_size(oracle_fn)
    if not looks_like_ecb(oracle_fn(b"A" * 100), block_size):
        raise ValueError("oracle_fn does not appear to produce ECB mode output")
    result = bytearray()
    while True:
        short_block_length = (block_size - len(result) - 1 - prefix_length) % block_size
        short_input_block = b"A" * short_block_length
        block_index = (len(result) + prefix_length) // block_size
        block_to_look_for = chunks(oracle_fn(short_input_block))[block_index]
        for guess in all_bytes_by_frequency:
            test_input = short_input_block + result + bytes([guess])
            if chunks(oracle_fn(test_input))[block_index] == block_to_look_for:
                result.append(guess)
                break
        else:  # if no byte matches
            return pkcs7_unpad(result)

Beispiel #11

0

Datei anzeigen

Datei: buffers.py Projekt: chazu/jinxes

 def add_text(self, text):
     if len(text) + len(self._lines[self.point[0]]) > self.draw_width:
         self.point_to_next_line()
     if len(text) > self.draw_width:
         lines_to_add = chunks(text, self.draw_width)
         lines_to_advance = len(lines_to_add)
         for line in lines_to_add:
             self._lines.append(line)
         self.adjust_point_by_lines(lines_to_advance)
     else:
         self._lines[self.point[0]] += text
     self.point_to_end_of_line()

Beispiel #12

0

Datei anzeigen

Datei: truth_pipeline.py Projekt: sfwatergit/e-mission-server

def cluster_to_kml(user, cluster, cluster_id):
    """
    Creates a single, or possibly multiple KML files a given cluster.
    A KML file is limited by MyMaps to having only 10 layers, so only 
    10 sections will be in a given KML file.

    Responsibilty of caller to check existence and formatting of cluster 
    """ 
    Sections = get_section_db()
    for i,chunk in enumerate(chunks(cluster,10)):    
        sections = map(lambda section_id: Sections.find_one({'_id':section_id}), chunk)
        sections_to_kml("%s_cluster_data_kml/CLUSTER_%s_%i" % (user, str(cluster_id), i), sections)

Beispiel #13

0

Datei anzeigen

Datei: svm.py Projekt: drstarry/minimal

 def cross_validation(self, fold, epoch):
     print 'doing cross validation...'
     splited_data = list(chunks(self.data, fold))
     hyper_test = defaultdict(int)
     for idx, (train, test) in enumerate(splited_data):
         for c in self.C:
             for rho_0 in self.RHO_0:
                 weight = self.train(train, rho_0, c, epoch=epoch)
                 precision = self.test(test, weight)
                 print 'done fold %i' % idx, ' on [rho_0: %s, c: %s]' \
                       % (rho_0, c)
                 hyper_test[(rho_0, c)] += precision
     return map(lambda (x, y): (x, y/fold), hyper_test.iteritems())

Beispiel #14

0

Datei anzeigen

Datei: iterators.py Projekt: StevenReitsma/kaggle-diabetic-retinopathy

	def start_producers(self, result_queue):
		jobs = Queue()
		n_workers = params.N_PRODUCERS
		batch_count = 0

		#Flag used for keeping values in queue in order
		last_queued_job = Value('i', -1)

		for job_index, batch in enumerate(util.chunks(self.X,self.batch_size)):
			batch_count += 1
			jobs.put( (job_index,batch) )

		# Define producer (putting items into queue)
		def produce(id):
			while True:
				job_index, task = jobs.get()

				if task is None:
					#print id, " fully done!"
					break

				result = self.gen(task)

				while(True):
					#My turn to add job done
					if last_queued_job.value == job_index-1:

						with last_queued_job.get_lock():
							result_queue.put(result)
							last_queued_job.value += 1
							#print id, " worker PUT", job_index
							break

		#Start workers
		for i in xrange(n_workers):

			if params.MULTIPROCESS:
				p = Process(target=produce, args=(i,))
			else:
				p = Thread(target=produce, args=(i,))

			p.daemon = True
			p.start()

		#Add poison pills to queue (to signal workers to stop)
		for i in xrange(n_workers):
			jobs.put((-1,None))


		return batch_count, jobs

Beispiel #15

0

Datei anzeigen

Datei: multithread_augment.py Projekt: StevenReitsma/kaggle-diabetic-retinopathy

def profile(subset=1000, multi=True, n_threads = 4, batch_size=64, thread_pool=False):

    # Load a bunch of imagenames
    y = util.load_labels()
    y = y[:subset]
    keys = y.index.values

    #Create sublists (batches)
    batched_keys = util.chunks(keys, batch_size)

    if multi:
        augment_multithreaded(batched_keys, n_threads=n_threads, thread_pool=thread_pool)
    else:
        augment_singlethreaded(batched_keys)

Beispiel #16

0

Datei anzeigen

Datei: learn.py Projekt: gzuidhof/cad

def threshold_optimization(p, y):
    print "Optimizing threshold"
    y_images = util.chunks(y, 384*512)

    def dice_objective(threshold):
        p_binary = np.where(p > threshold, 1,0)
        p_images_binary = util.chunks(p_binary, 384*512)

        mean, std, dices = dice(p_images_binary, y_images)
        return -mean

    x, v, message = scipy.optimize.fmin_l_bfgs_b(dice_objective, 0.5, approx_grad=True, bounds=[(0, 1)], epsilon=1e-03)
    print "Optimized, threshold {0}, ? {1}, termination because {2}".format(x,v,message)
    return x[0]

Beispiel #17

0

Datei anzeigen

Datei: StratumSource.py Projekt: AngelMarc/poclbm

	def refresh_job(self, j):
		j.extranonce2 = self.increment_nonce(j.extranonce2)
		coinbase = j.coinbase1 + self.extranonce + j.extranonce2 + j.coinbase2
		merkle_root = sha256(sha256(unhexlify(coinbase)).digest()).digest()

		for hash_ in j.merkle_branch:
			merkle_root = sha256(sha256(merkle_root + unhexlify(hash_)).digest()).digest()
		merkle_root_reversed = ''
		for word in chunks(merkle_root, 4):
			merkle_root_reversed += word[::-1]
		merkle_root = hexlify(merkle_root_reversed)

		j.block_header = ''.join([j.version, j.prevhash, merkle_root, j.ntime, j.nbits])
		j.time = time()
		return j

Beispiel #18

0

Datei anzeigen

Datei: apimethod.py Projekt: Susheng/pybetdaq

    def call(self, orderlist):
        assert isinstance(orderlist, list)
        orders = {}
        MAXORDERS = 50
        for ol in util.chunks(orderlist, MAXORDERS):        
            # make BDAQ representation of orders from orderlist past
            self.req.Orders.Order = self.makeorderlist(ol)
            apilog.info('calling BDAQ Api PlaceOrdersNoReceipt')
            result = self.client.service.PlaceOrdersNoReceipt(self.req)
            ors = apiparse.ParsePlaceOrdersNoReceipt(result, orderlist)
            orders.update(ors)

        # note: could put result.Timestamp in order object so that we
        # are saving the BDAQ time.
        return orders

Beispiel #19

0

Datei anzeigen

Datei: lookup.py Projekt: MTG/echonest-backup

def status_iter(iterable, callback, chunksize=1, reportsize=10):
    itersize = len(iterable)
    starttime = time.time()
    for i, item in enumerate(util.chunks(iterable, chunksize), 1):
        callback(item)
        if i % reportsize == 0:
            done = i * chunksize
            nowtime = time.time()
            numblocks = itersize * 1.0 / (reportsize*chunksize)
            curblock = done / (reportsize*chunksize)
            position = curblock / numblocks
            duration = round(nowtime - starttime)
            durdelta = datetime.timedelta(seconds=duration)
            remaining = round((duration / position) - duration)
            remdelta = datetime.timedelta(seconds=remaining)
            lookuplog.info("Done %s/%s in %s; %s remaining", done, itersize, str(durdelta), str(remdelta))
    lookuplog.info("Finished")

Beispiel #20

0

Datei anzeigen

Datei: classifier.py Projekt: slyngbaek/wikiometer

def nfold_cross_validate(data, n=4):
    data_chunks = chunks(data, len(data) / n)

    rmse_values = []
    for i in range(n):
        train_set = flatten(data_chunks[:i] + data_chunks[i + 1:])
        test_set = data_chunks[i]
        classif = nltk.MaxentClassifier.train(train_set)   
        
        test_fs, test_ratings = zip(*test_set)
        results = classif.batch_classify(test_fs)
        set_rmse = rmse(test_ratings, results)
        print 'RMSE: ', set_rmse

        rmse_values.append(set_rmse)
    
    print 'Average RMSE:', sum(rmse_values) / float(len(rmse_values))

Beispiel #21

0

Datei anzeigen

Datei: dogstatsd.py Projekt: MinerKasch/dd-agent

    def submit_events(self, events):
        headers = {"Content-Type": "application/json"}
        event_chunk_size = self.event_chunk_size

        for chunk in chunks(events, event_chunk_size):
            payload = {
                "apiKey": self.api_key,
                "events": {"api": chunk},
                "uuid": get_uuid(),
                "internalHostname": get_hostname(),
            }
            params = {}
            if self.api_key:
                params["api_key"] = self.api_key
            url = "%s/intake?%s" % (self.api_host, urlencode(params))

            self.submit_http(url, json.dumps(payload), headers)

Beispiel #22

0

Datei anzeigen

Datei: dogstatsd.py Projekt: Shopify/dd-agent

    def submit_events(self, events):
        headers = {'Content-Type':'application/json'}
        event_chunk_size = self.event_chunk_size

        for chunk in chunks(events, event_chunk_size):
            payload = {
                'apiKey': self.api_key,
                'events': {
                    'api': chunk
                },
                'uuid': get_uuid(),
                'internalHostname': get_hostname()
            }
            params = {}
            if self.api_key:
                params['api_key'] = self.api_key
            url = '%s/intake?%s' % (self.api_host, urlencode(params))

            self.submit_http(url, json.dumps(payload), headers)

Beispiel #23

0

Datei anzeigen

Datei: parallel.py Projekt: gzuidhof/luna16

	def _start_producers(self, result_queue):
		jobs = Queue()
		n_workers = self.n_producers
		batch_count = 0

		# Flag used for keeping values in queue in order
		last_queued_job = Value('i', -1)

		chunks = util.chunks(self.X,self.batch_size)


		# Add jobs to queue
		for job_index, X_batch in enumerate(chunks):
			batch_count += 1
			jobs.put( (job_index,X_batch) )

		# Add poison pills to queue (to signal workers to stop)
		for i in xrange(n_workers):
			jobs.put((-1,None))

		# Define producer function
		produce = partial(_produce_helper,
			generator=self.generator,
			jobs=jobs,
			result_queue=result_queue,
			last_queued_job=last_queued_job,
			ordered=self.ordered)

		# Start worker processes or threads
		for i in xrange(n_workers):
			name = "ParallelBatchIterator worker {0}".format(i)

			if self.multiprocess:
				p = Process(target=produce, args=(i,), name=name)
			else:
				p = Thread(target=produce, args=(i,), name=name)

			# Make the process daemon, so the main process can die without these finishing
			#p.daemon = True
			p.start()

		return batch_count, jobs

Beispiel #24

0

Datei anzeigen

Datei: apimethod.py Projekt: Susheng/pybetdaq

    def call(self, mids):
        """
        Return all selections for Market ids in mids, where mids is a
        list of market ids.
        """

        allselections = []
        # split up mids into groups of size MAXMIDS
        for (callnum, ids) in \
            enumerate(util.chunks(mids, ApiGetPrices.MAXMIDS)):
            self.req.MarketIds = ids
            if callnum > 0:
                # sleep for some time before calling Api again
                time.sleep(self.throttl)
                
            apilog.info('calling BDAQ Api GetPrices')        
            result = self.client.service.GetPrices(self.req)
            selections =  apiparse.ParseGetPrices(ids, result)
            allselections = allselections + selections

        return allselections

Beispiel #25

0

Datei anzeigen

Datei: learn.py Projekt: gzuidhof/cad

def threshold_optimization_naive(p,y):
    print "Optimizing threshold"
    y_images = util.chunks(y, 384*512)

    candidates = np.arange(0.25,0.75,1/2500)

    def dice_objective(threshold):
        p_binary = np.where(p > threshold, 1,0)
        p_images_binary = util.chunks(p_binary, 384*512)

        mean, std, dices = dice(p_images_binary, y_images)
        return mean

    #score = map(dice_objective,tqdm(candidates))
    scores = []
    for t in tqdm(candidates):
        score = dice_objective(t)
        scores.append(score)
    print np.argmax(scores)
    threshold = candidates[np.argmax(scores)]
    print "Best threshold ", threshold
    return threshold

Beispiel #26

0

Datei anzeigen

Datei: lookup_tracks.py Projekt: MTG/echonest-backup

def lookup():
    """ returns (done, remaining)"""
    songs = db.data.get_pending_songs()
    songcount = db.data.get_count_pending_songs()

    if not songs:
        return (0, 0)

    # We can use a with statement to ensure threads are cleaned up promptly
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        # Start the load operations and mark each future with its URL
        i = 0
        future_to_song = {}
        for songchunk in util.chunks(songs, 10):
            future_to_song[executor.submit(query, songchunk, i)] = songchunk
            i = 1 - i

        for future in concurrent.futures.as_completed(future_to_song):
            songchunk = future_to_song[future]
            # For each set of songs, get them from the response
            # for songs not in the response, add an empty response
            try:
                data = future.result()
            except Exception as exc:
                print('%r generated an exception: %s' % (songchunk, exc))
            else:
                gotsongs = set()
                waitings = set(songchunk)
                results = data["response"].get("songs", [])
                for s in results:
                    songid = s["id"]
                    gotsongs.add(songid)
                    response = {"response": {"songs": [s], "status": data["response"]["status"]}}
                    db.data.add_response_if_not_exists(echonest.SONG_PROFILE, songid, response)
                nosongs = waitings-gotsongs
                for s in list(nosongs):
                    db.data.add_response_if_not_exists(echonest.SONG_PROFILE, s, {})

    return (len(songs), songcount-len(songs))

Beispiel #27

0

Datei anzeigen

Datei: filter_and_lemma.py Projekt: gzuidhof/text-mining

def filter_and_lemma(chunk_size=2000):
    files = glob.glob(INPUT_FOLDER+'*.frog.out')

    lemmatized = {}

    #Split all files in the list into chunks
    file_chunks = util.chunks(files, chunk_size)

    for i, chunk in enumerate(tqdm(file_chunks)):
        pool = Pool(processes=util.CPU_COUNT)
        filtered_lemmatized = pool.map(process, chunk)
        pool.close()

        for filename, value in zip(chunk, filtered_lemmatized):
            file_id = util.filename_without_extension(filename, '.frog.out')
            lemmatized[file_id] = value

    #Order by key
    ordered = OrderedDict(sorted(lemmatized.items()))

    with open(DATA_FOLDER+'processed.p','w') as f:
        pickle.dump(ordered,f)
    print "Done!"

Beispiel #28

0

Datei anzeigen

Datei: dogstatsd.py Projekt: dhapgood4thscreen/dd-agent

    def submit_events(self, events):
        headers = {'Content-Type':'application/json'}
        method = 'POST'

        events_len = len(events)
        event_chunk_size = self.event_chunk_size

        for chunk in chunks(events, event_chunk_size):
            payload = {
                'apiKey': self.api_key,
                'events': {
                    'api': chunk
                },
                'uuid': get_uuid(),
                'internalHostname': get_hostname()
            }
            params = {}
            if self.api_key:
                params['api_key'] = self.api_key
            url = '/intake?%s' % urlencode(params)

            status = None
            conn = self.http_conn_cls(self.api_host)
            try:
                start_time = time()
                conn.request(method, url, json.dumps(payload), headers)

                response = conn.getresponse()
                status = response.status
                response.close()
                duration = round((time() - start_time) * 1000.0, 4)
                log.debug("%s %s %s%s (%sms)" % (
                                status, method, self.api_host, url, duration))

            finally:
                conn.close()

Beispiel #29

0

Datei anzeigen

Datei: train.py Projekt: chambai/deepstreamce

def getActivations(x_train, numActivationTrainingInstances, model, dnnModel,
                   y_train):
    util.thisLogger.logInfo(
        "------ start of activation data extraction for training data -------")
    startTime = datetime.datetime.now()

    # Only get activations from the instances that are correctly classified
    y_predict = np.argmax(dnnModel.predict(x_train), axis=1)

    # The DNN is trained to output 0 or 1 only.
    # get the original classes it was trained on and transform the outputs
    classes = util.getParameter('DataClasses')
    classes = np.asarray(classes.replace('[', '').replace(
        ']', '').split(',')).astype(int)
    util.thisLogger.logInfo('Data classes to be used: %s' % (classes))
    count = 0
    for c in classes:
        y_predict = np.where(y_predict == count, c, y_predict)
        count += 1

    incorrectPredictIndexes = []
    for i in range(0, len(y_predict) - 1):
        if (y_predict[i] != y_train[i]):
            incorrectPredictIndexes.append(i)

    x_train = np.delete(x_train, incorrectPredictIndexes, axis=0)
    y_train = np.delete(y_train, incorrectPredictIndexes, axis=0)
    y_predict = np.delete(y_predict, incorrectPredictIndexes, axis=0)

    # train in batches
    activationTrainingBatchSize = util.getParameter(
        'ActivationTrainingBatchSize')

    if numActivationTrainingInstances == -1:
        numActivationTrainingInstances = len(x_train)

    xData = x_train[:numActivationTrainingInstances, ]
    batchData = list(util.chunks(xData, activationTrainingBatchSize))

    activationData = []
    numBatches = len(batchData)
    batchActivationData = [[] for i in range(numBatches)]
    for batchIndex in range(numBatches):
        batch = batchData[batchIndex]
        util.thisLogger.logInfo("Training batch " + str(batchIndex + 1) +
                                " of " + str(len(batchData)) + " (" +
                                str(len(batch)) + " instances)")
        # Get activations and set up streams for the training data
        # get reduced activations for all training data in one go

        # Train in a loop
        util.thisLogger.logInfo(
            str(len(batch)) + " instances selected from training data")

        activations, numLayers = extract.getActivationData(model, batch)
        batchActivationData[batchIndex].append(activations)
        activationData.append(activations)

        util.thisLogger.logInfo(
            "Filter Layers: DNN has %s activation layers, getting activation data for %s instances."
            % (numLayers, len(batch)))

    endTime = datetime.datetime.now()
    util.thisLogger.logInfo('Total training time: ' + str(endTime - startTime))
    util.thisLogger.logInfo(
        "------- end of activation data extraction for training data --------")
    util.thisLogger.logInfo("")

    return numLayers, batchData, activationData, batchActivationData

Beispiel #30

0

Datei anzeigen

Datei: NARR_to_EPIC.py Projekt: ritviksahajpal/EPIC

def NARR_to_EPIC(vals):
    lat,lon = vals
    # Output pandas frame into EPIC weather file
    out_fl   = constants.epic_dly+os.sep+str(lat)+'_'+str(lon)+'.txt'

    if not(os.path.isfile(out_fl)):
        logging.info(out_fl) 
        # List all years for which we will create EPIC file
        lst_yrs     = rrule(YEARLY, dtstart=constants.strt_date, until=constants.end_date)

        # Create pandas data frame, fill with 0.0s, for 1st year.
        epic_df = pandas.DataFrame(index=pandas.date_range(constants.strt_date,constants.end_date),\
                                   columns=[constants.vars_to_get.keys()])
        epic_out = open(out_fl,'w')

        # Loop across years
        for idx_yr in range(lst_yrs.count()):		
            cur_strt_date  = datetime.date(lst_yrs[idx_yr].year,1,1)
            cur_end_date   = datetime.date(lst_yrs[idx_yr].year,12,31)
            cur_date_range = pandas.date_range(cur_strt_date,cur_end_date)

            tmp_df         = pandas.DataFrame(index=cur_date_range,columns=[constants.vars_to_get.keys()])
            tmp_df.fillna(0.0,inplace=True)
            # Loop across variables
            for cur_var in constants.vars_to_get.keys():
                e_fl      = open(constants.data_dir + os.sep + 'Data' + os.sep + cur_var + os.sep + str(lst_yrs[idx_yr].year)+\
                                      os.sep + str(lat) + '_' + str(lon) + '.txt')
                epic_vars = filter(None,e_fl.readlines()[0].strip().split("'"))

                if cur_var == 'air.2m':
                    epic_min_tmp     = util.chunks(epic_vars,8,True)
                    epic_max_tmp     = util.chunks(epic_vars,8,False)

                    tmp_df[cur_var] = pandas.Series(epic_min_tmp,index=cur_date_range)
                    tmp_df[cur_var] = tmp_df[cur_var].map(lambda x:float(x)+constants.K_To_C)

                    tmp_df['tmax']  = pandas.Series(epic_max_tmp,index=cur_date_range)
                    tmp_df['tmax']  = tmp_df['tmax'].map(lambda x:float(x)+constants.K_To_C)
                    tmp_df['tmin']  = tmp_df['air.2m'] 
                else:
                    tmp_df[cur_var] = pandas.Series(epic_vars,index=cur_date_range)
                    tmp_df[cur_var] = tmp_df[cur_var].map(lambda x:float(x))
        
            # Get into right units
            tmp_df['wnd']      = pandas.Series(tmp_df['uwnd.10m'].astype(float)**2.0+\
                                                tmp_df['vwnd.10m'].astype(float)**2.0,index=tmp_df.index)
            tmp_df['wnd']      = tmp_df['wnd']**0.5
            tmp_df['rhum.2m']  = tmp_df['rhum.2m'].map(lambda x:float(x)/100.0)
            tmp_df['swr_diff'] = pandas.Series(tmp_df['dswrf']-tmp_df['uswrf.sfc'],index=tmp_df.index)
            tmp_df['srad']     = tmp_df['swr_diff'].map(lambda x:constants.WMsq_MjMsq*x)
            tmp_df['year']     = tmp_df.index.year
            tmp_df['month']    = tmp_df.index.month
            tmp_df['day']      = tmp_df.index.day
            epic_df            = epic_df.combine_first(tmp_df)
        # Output dataframe to text file with right formatting
        for index, row in epic_df.iterrows():
            epic_out.write(('%6d%4d%4d'+6*'%6.2f'+'\n') %
                        (row['year'],row['month'],row['day'],
                         row['srad'],row['tmax'],row['tmin'],
                         row['apcp'],row['rhum.2m'],row['wnd']))
        epic_out.close()
    else:
        logging.info('File exists: '+out_fl)

Beispiel #31

0

Datei anzeigen

Datei: term_autoencoder.py Projekt: UCSD-PL/proverbot9001

def main(args_list: List[str]) -> None:
    parser = argparse.ArgumentParser(description="Autoencoder for coq terms")
    add_std_args(parser)
    parser.add_argument("--gamma", default=.9, type=float)
    parser.add_argument("--epoch-step", default=5, type=int)
    parser.add_argument("--num-decoder-layers",
                        dest="num_decoder_layers",
                        default=3,
                        type=int)
    args = parser.parse_args(args_list)
    curtime = time.time()
    print("Loading data...", end="")
    sys.stdout.flush()
    dataset = list(
        itertools.islice(read_text_data(args.scrape_file), args.max_tuples))

    print(" {:.2f}s".format(time.time() - curtime))
    curtime = time.time()
    print("Extracting terms...", end="")
    sys.stdout.flush()
    term_strings = list(
        chain.from_iterable(
            [[hyp.split(":")[1].strip()
              for hyp in datum.context.focused_hyps] +
             [datum.context.focused_goal] for datum in dataset]))
    print(" {:.2f}s".format(time.time() - curtime))

    curtime = time.time()
    print("Building tokenizer...", end="")
    sys.stdout.flush()
    tokenizer = tk.make_keyword_tokenizer_topk(term_strings,
                                               tk.tokenizers[args.tokenizer],
                                               args.num_keywords, 2)
    print(" {:.2f}s".format(time.time() - curtime))
    curtime = time.time()
    print("Tokenizing {} strings...".format(len(term_strings)), end="")
    sys.stdout.flush()

    with multiprocessing.Pool(None) as pool:
        tokenized_data_chunks = pool.imap_unordered(
            functools.partial(use_tokenizer, tokenizer, args.max_length),
            chunks(term_strings, 32768))
        tokenized_data = list(chain.from_iterable(tokenized_data_chunks))

    print(" {:.2f}s".format(time.time() - curtime))
    checkpoints = train(tokenized_data, tokenizer.numTokens(), args.max_length,
                        args.hidden_size, args.learning_rate, args.epoch_step,
                        args.gamma, args.num_encoder_layers,
                        args.num_decoder_layers, args.num_epochs,
                        args.batch_size, args.print_every,
                        optimizers[args.optimizer])
    for epoch, (encoder_state, decoder_state,
                training_loss) in enumerate(checkpoints):
        state = {
            'epoch': epoch,
            'training-loss': training_loss,
            'tokenizer': tokenizer,
            'tokenizer-name': args.tokenizer,
            'optimizer': args.optimizer,
            'learning-rate': args.learning_rate,
            'encoder': encoder_state,
            'decoder': decoder_state,
            'num-encoder-layers': args.num_encoder_layers,
            'num-decoder-layers': args.num_decoder_layers,
            'max-length': args.max_length,
            'hidden-size': args.hidden_size,
            'num-keywords': args.num_keywords,
            'context-filter': args.context_filter,
        }
        with open(args.save_file, 'wb') as f:
            print("=> Saving checkpoint at epoch {}".format(epoch))
            torch.save(state, f)
    pass

Beispiel #32

0

Datei anzeigen

 def expect_layout(self, layout):
     compressed = lzma.compress(layout.encode("utf-8"))
     self.expect("FE01", struct.pack("<I", len(compressed)))
     for idx, chunk in enumerate(chunks(compressed, 32)):
         self.expect(struct.pack("<BBI", 0xFE, 0x02, idx), chunk)

Beispiel #33

0

Datei anzeigen

Datei: NARR_to_EPIC.py Projekt: grahamjeffries/EPIC

def NARR_to_EPIC(vals):
    lat, lon = vals
    # Output pandas frame into EPIC weather file
    out_fl = constants.epic_dly + os.sep + str(lat) + '_' + str(lon) + '.txt'

    if not (os.path.isfile(out_fl)):
        logging.info(out_fl)
        # List all years for which we will create EPIC file
        lst_yrs = rrule(YEARLY,
                        dtstart=constants.strt_date,
                        until=constants.end_date)

        # Create pandas data frame, fill with 0.0s, for 1st year.
        epic_df = pandas.DataFrame(index=pandas.date_range(constants.strt_date,constants.end_date),\
                                   columns=[constants.vars_to_get.keys()])
        epic_out = open(out_fl, 'w')

        # Loop across years
        for idx_yr in range(lst_yrs.count()):
            cur_strt_date = datetime.date(lst_yrs[idx_yr].year, 1, 1)
            cur_end_date = datetime.date(lst_yrs[idx_yr].year, 12, 31)
            cur_date_range = pandas.date_range(cur_strt_date, cur_end_date)

            tmp_df = pandas.DataFrame(index=cur_date_range,
                                      columns=[constants.vars_to_get.keys()])
            tmp_df.fillna(0.0, inplace=True)
            # Loop across variables
            for cur_var in constants.vars_to_get.keys():
                e_fl      = open(constants.data_dir + os.sep + 'Data' + os.sep + cur_var + os.sep + str(lst_yrs[idx_yr].year)+\
                                      os.sep + str(lat) + '_' + str(lon) + '.txt')
                epic_vars = filter(None,
                                   e_fl.readlines()[0].strip().split("'"))

                if cur_var == 'air.2m':
                    epic_min_tmp = util.chunks(epic_vars, 8, True)
                    epic_max_tmp = util.chunks(epic_vars, 8, False)

                    tmp_df[cur_var] = pandas.Series(epic_min_tmp,
                                                    index=cur_date_range)
                    tmp_df[cur_var] = tmp_df[cur_var].map(
                        lambda x: float(x) + constants.K_To_C)

                    tmp_df['tmax'] = pandas.Series(epic_max_tmp,
                                                   index=cur_date_range)
                    tmp_df['tmax'] = tmp_df['tmax'].map(
                        lambda x: float(x) + constants.K_To_C)
                    tmp_df['tmin'] = tmp_df['air.2m']
                else:
                    tmp_df[cur_var] = pandas.Series(epic_vars,
                                                    index=cur_date_range)
                    tmp_df[cur_var] = tmp_df[cur_var].map(lambda x: float(x))

            # Get into right units
            tmp_df['wnd']      = pandas.Series(tmp_df['uwnd.10m'].astype(float)**2.0+\
                                                tmp_df['vwnd.10m'].astype(float)**2.0,index=tmp_df.index)
            tmp_df['wnd'] = tmp_df['wnd']**0.5
            tmp_df['rhum.2m'] = tmp_df['rhum.2m'].map(
                lambda x: float(x) / 100.0)
            tmp_df['swr_diff'] = pandas.Series(tmp_df['dswrf'] -
                                               tmp_df['uswrf.sfc'],
                                               index=tmp_df.index)
            tmp_df['srad'] = tmp_df['swr_diff'].map(
                lambda x: constants.WMsq_MjMsq * x)
            tmp_df['year'] = tmp_df.index.year
            tmp_df['month'] = tmp_df.index.month
            tmp_df['day'] = tmp_df.index.day
            epic_df = epic_df.combine_first(tmp_df)
        # Output dataframe to text file with right formatting
        for index, row in epic_df.iterrows():
            epic_out.write(('%6d%4d%4d' + 6 * '%6.2f' + '\n') %
                           (row['year'], row['month'], row['day'], row['srad'],
                            row['tmax'], row['tmin'], row['apcp'],
                            row['rhum.2m'], row['wnd']))
        epic_out.close()
    else:
        logging.info('File exists: ' + out_fl)

Beispiel #34

0

Datei anzeigen

Datei: optimize.py Projekt: thanif/traversing-knowledge-graphs

    def maximize(self):
        print 'mini-batch gd: examples = {}, batch size = {}'.format(len(self.train), self.batch_size)

        # these are for multithreading
        q_in = Queue()
        q_out = Queue()

        def worker():
            while True:
                ex = q_in.get()
                q_out.put(self.objective.gradient(self.params, ex))
                q_in.task_done()

        # launch workers
        for i in range(self.num_threads):
            t = threading.Thread(target=worker)
            t.daemon = True
            t.start()

        # no. of mini-batch steps taken
        self.steps = 0
        while True:
            # form fresh batches
            train_copy = list(self.train)
            random.shuffle(train_copy)
            batches = list(util.chunks(train_copy, self.batch_size))

            for batch in batches:
                grad = SparseVector()

                if self.num_threads == 1:
                    for ex in batch:
                        grad_ex = self.objective.gradient(self.params, ex)
                        grad += grad_ex
                else:
                    # WARNING: this is only safe if examples in the batch are mutually exclusive
                    for ex in batch:
                        q_in.put(ex)
                    q_in.join()
                    while not q_out.empty():
                        grad += q_out.get()

                for frozen in self.freeze_params:
                    grad.remove(frozen)

                # normalize by batch size
                grad *= 1.0 / len(batch)

                # add regularization gradient
                if self.l1_reg != 0.0 or self.l2_reg != 0.0:
                    reg_grad = self.reg_gradient(self.params, grad, self.approx_reg)
                    grad += reg_grad

                # record gradient norm, before gradient gets modified by various algorithms
                self.gnorm = grad.norm2()

                delta = grad

                # check if Adagrad controller is begin used
                adagrad = next((controller for controller in self.controllers if isinstance(controller, AdaGrad)), None)
                if adagrad is None:
                    delta *= self.step_size
                    self.delta = delta
                else:
                    # this controller will modify self.delta
                    self.delta = delta
                    adagrad.control(self)

                # these controllers will modify self.delta, and maybe also self.halt
                for controller in self.controllers:
                    if isinstance(controller, AdaGrad):
                        continue
                    controller.control(self)

                # update params
                self.params += self.delta

                # check if unit normalization controller
                unit_norm = next((controller for controller in self.controllers if isinstance(controller, UnitNorm)), None)
                if unit_norm is not None:
                    unit_norm.control(self)

                self.track()

                self.steps += 1

                if self.halt:
                    return self.params

Beispiel #35

0

Datei anzeigen

def plot_importances(article_sents,
                     importances,
                     abstracts_text,
                     save_location=None,
                     save_name=None):
    plt.ioff()
    sents_per_figure = 40
    max_importance = np.max(importances)
    chunked_sents = util.chunks(article_sents, sents_per_figure)
    chunked_importances = util.chunks(importances, sents_per_figure)

    for chunk_idx in range(len(chunked_sents)):
        my_article_sents = chunked_sents[chunk_idx]
        my_importances = chunked_importances[chunk_idx]

        if len(my_article_sents) < sents_per_figure:
            my_article_sents += [''
                                 ] * (sents_per_figure - len(my_article_sents))
            my_importances = np.concatenate([
                my_importances,
                np.zeros([sents_per_figure - len(my_importances)])
            ])

        y_pos = np.arange(len(my_article_sents))
        fig, ax1 = plt.subplots()
        fig.subplots_adjust(left=0.9, top=1.0, bottom=0.03, right=1.0)
        ax1.barh(y_pos,
                 my_importances,
                 align='center',
                 color='green',
                 ecolor='black')
        ax1.set_yticks(y_pos)
        ax1.set_yticklabels(my_article_sents)
        ax1.invert_yaxis()  # labels read top-to-bottom
        ax1.set_xlabel('Performance')
        ax1.set_title('How fast do you want to go today?')
        ax1.set_xlim(right=max_importance)

        fig.set_size_inches(18.5, 10.5)
        plt.savefig(
            os.path.join(save_location,
                         save_name + '_' + str(chunk_idx) + '.jpg'))
        plt.close(fig)

    plt.figure()
    fig_txt = tw.fill(tw.dedent(abstracts_text), width=80)
    plt.figtext(0.5,
                0.5,
                fig_txt,
                horizontalalignment='center',
                fontsize=9,
                multialignment='left',
                bbox=dict(boxstyle="round",
                          facecolor='#D8D8D8',
                          ec="0.5",
                          pad=0.5,
                          alpha=1),
                fontweight='bold')
    fig = plt.gcf()
    fig.set_size_inches(18.5, 10.5)
    plt.savefig(
        os.path.join(save_location,
                     save_name + '_' + str(chunk_idx + 1) + '.jpg'))
    plt.close(fig)

Beispiel #36

0

Datei anzeigen

Datei: regression_inverse_test.py Projekt: yaroslavvb/whitening

        resvar = np.asarray([np.linalg.norm(r)**2 for r in R])
        losses.append(np.sum(resvar))
        D2 = np.diag(1 / resvar)
        precision2 = D2 @ (np.identity(n) - B)

        err = (precision2 - precision)
        loss2 = np.trace(err @ err.T)
        B = B - lr * G
        print(loss2)

    test_points = 10
    losses = np.asarray(losses)[:test_points]
    target_losses = [
        118., 41.150800000000004, 33.539355199999996, 29.747442032320002,
        27.450672271574934, 25.95846376879459, 24.917943341139274,
        24.139761502111114, 23.519544126307142, 22.998235729589265
    ]

    u.check_equal(losses[:test_points], target_losses[:test_points])
    print('mismatch is ', np.max(losses - target_losses))


if __name__ == '__main__':
    numbers = [(x + 1)**3 for x in range(16)]
    list(u.chunks(numbers, 4))
    X = np.array(list(u.chunks(numbers, 4)))

    X = np.asarray([[5, 1, 0, 4], [0, 4, 1, 2], [1, 0, 3, 3], [4, 2, 0, 4]])
    test_numpy(X)

Beispiel #37

0

Datei anzeigen

lastTimeStamp = None

lnameDict = lname(s.LDBPATH)
connections = {}
whoCache = {}
hostnames = []
hostnameToCluster = {}
for cluster in s.MACHINES['clusters']:
    if cluster not in whoCache:
        whoCache[cluster] = OrderedDict()
    for hostname in s.MACHINES['clusters'][cluster]['hostnames']:
        hostnames.append(hostname)
        hostnameToCluster[hostname] = cluster

hostnamesChunked = list(util.chunks(hostnames, len(hostnames)//s.THREADS))
threads = []
clients = []
thread_times = []

def sshAndGetWho(client, hostname):
    #s.log('sshing into %s', hostname)
    who = []
    try:
        client.connect(
            hostname,
            username=s.USERNAME,
            password=s.PASSWORD,
        )
        stdin, stdout, stderr = client.exec_command('w')
        # get rid of first two lines of w output

Beispiel #38

0

Datei anzeigen

Datei: jobs.py Projekt: project-renard-survey/oadoi

def enqueue_jobs(cls,
         method,
         ids_q_or_list,
         queue_number,
         use_rq=True,
         append=False,
         chunk_size=25,
         shortcut_fn=None
    ):
    """
    Takes sqlalchemy query with IDs, runs fn on those repos.
    """

    shortcut_data = None
    if use_rq:
        if shortcut_fn:
            raise ValueError("you can't use RQ with a shortcut_fn")

    else:
        if shortcut_fn:
            shortcut_data_start = time()
            logger.info(u"Getting shortcut data...")
            shortcut_data = shortcut_fn()
            logger.info(u"Got shortcut data in {} seconds".format(
                elapsed(shortcut_data_start)
            ))

    chunk_size = int(chunk_size)


    start_time = time()
    new_loop_start_time = time()
    index = 0

    try:
        logger.info(u"running this query: \n{}\n".format(
            ids_q_or_list.statement.compile(dialect=postgresql.dialect())))
        row_list = ids_q_or_list.all()

    except AttributeError:
        logger.info(u"running this query: \n{}\n".format(ids_q_or_list))
        row_list = db.engine.execute(sql.text(ids_q_or_list)).fetchall()

    if row_list is None:
        logger.info(u"no IDs, all done.")
        return None

    logger.info(u"finished enqueue_jobs query in {} seconds".format(elapsed(start_time)))
    object_ids = [row[0] for row in row_list]

    # do this as late as possible so things can keep using queue
    if use_rq:
        if append:
            logger.info(u"not clearing queue.  queue currently has {} jobs".format(ti_queues[queue_number].count))
        else:
            empty_queue(queue_number)


    num_items = len(object_ids)
    logger.info(u"adding {} items to queue...".format(num_items))

    # iterate through chunks of IDs like [[id1, id2], [id3, id4], ...  ]
    object_ids_chunk = []

    for object_ids_chunk in chunks(object_ids, chunk_size):

        update_fn_args = [cls, method, object_ids_chunk]

        if use_rq:
            job = ti_queues[queue_number].enqueue_call(
                func=update_fn,
                args=update_fn_args,
                timeout=60 * 10,
                result_ttl=0  # number of seconds
            )
            job.meta["object_ids_chunk"] = object_ids_chunk
            job.save()
            # logger.info(u"saved job {}".format(job))
        else:
            update_fn_args.append(shortcut_data)
            update_fn(*update_fn_args, index=index)

        if True: # index % 10 == 0 and index != 0:
            num_jobs_remaining = num_items - (index * chunk_size)
            try:
                jobs_per_hour_this_chunk = chunk_size / float(elapsed(new_loop_start_time) / 3600)
                predicted_mins_to_finish = round(
                    (num_jobs_remaining / float(jobs_per_hour_this_chunk)) * 60,
                    1
                )
                logger.info(u"\n\nWe're doing {} jobs per hour. At this rate, done in {}min".format(
                    int(jobs_per_hour_this_chunk),
                    predicted_mins_to_finish
                ))
                logger.info(u"(finished chunk {} of {} chunks in {} seconds total, {} seconds this loop)\n".format(
                    index,
                    num_items/chunk_size,
                    elapsed(start_time),
                    elapsed(new_loop_start_time)
                ))
            except ZeroDivisionError:
                # logger.info(u"not printing status because divide by zero")
                logger.info(u"."),


            new_loop_start_time = time()
        index += 1
    logger.info(u"last chunk of ids: {}".format(list(object_ids_chunk)))

    db.session.remove()  # close connection nicely
    return True

Beispiel #39

0

Datei anzeigen

Datei: correct_raw_data.py Projekt: bopopescu/CloudReg

def correct_raw_data(raw_data_path,
                     channel,
                     subsample_factor=2,
                     log_s3_path=None,
                     background_correction=True):

    total_n_jobs = cpu_count()
    # overwrite existing raw data with corrected data
    outdir = raw_data_path

    # get list of all tiles to correct for  given channel
    all_files = np.sort(glob.glob(f'{raw_data_path}/*/*.tiff'))
    if background_correction:
        background_val = get_background_value(raw_data_path)
    total_files = len(all_files)

    bias_path = f'{outdir}/CHN0{channel}_bias.tiff'
    if os.path.exists(bias_path):
        bias = tf.imread(bias_path)

    else:
        # subsample tiles
        files_cb = all_files[::subsample_factor]
        num_files = len(files_cb)

        # compute running sums in parallel
        sums = Parallel(total_n_jobs, verbose=10)(
            delayed(sum_tiles)(f)
            for f in chunks(files_cb,
                            math.ceil(num_files // (total_n_jobs)) + 1))
        sums = [i[:, :, None] for i in sums]
        mean_tile = np.squeeze(np.sum(np.concatenate(sums, axis=2),
                                      axis=2)) / num_files
        if background_correction:
            # subtract background out from bias correction
            mean_tile -= background_val
        mean_tile = sitk.GetImageFromArray(mean_tile)

        # get the bias correction tile using N4ITK
        bias = sitk.GetArrayFromImage(get_bias_field(mean_tile, scale=1.0))

        # save bias tile to local directory
        tf.imsave(bias_path, bias.astype('float32'))

    # save bias tile to S3
    if log_s3_path:
        s3 = boto3.resource('s3')
        img = Image.fromarray(bias)
        fp = BytesIO()
        img.save(fp, format='TIFF')
        # reset pointer to beginning  of file
        fp.seek(0)
        log_s3_url = S3Url(log_s3_path.strip('/'))
        bias_path = f'{log_s3_url.key}/CHN0{channel}_bias.tiff'
        s3.Object(log_s3_url.bucket, bias_path).upload_fileobj(fp)

    # correct all the files and save them
    files_per_proc = math.ceil(total_files / total_n_jobs) + 1
    work = chunks(all_files, files_per_proc)
    with tqdm_joblib(tqdm(desc="Correcting tiles",
                          total=total_n_jobs)) as progress_bar:
        Parallel(n_jobs=total_n_jobs, verbose=10)(
            delayed(correct_tiles)(files, outdir, bias, background_val)
            for files in work)

Beispiel #40

0

Datei anzeigen

 def queue_work(self, work, miner=None):
     target = ''.join(
         list(chunks('%064x' % self.server_difficulty, 2))[::-1])
     self.switch.queue_work(self, work.block_header, target, work.job_id,
                            work.extranonce2, miner)

Beispiel #41

0

Datei anzeigen

Datei: dwimage.py Projekt: Nie-yingchun/dwilib

 def subwindow_shape(self):
     return tuple((b-a for a, b in util.chunks(self.subwindow, 2)))

Beispiel #42

0

Datei anzeigen

Datei: buffers.py Projekt: chazu/jinxes

    def build_lines(self):

        self._lines = chunks(self._text, self.draw_width)
        self.scroll["maxCurrentLine"] = len(self._lines)

Beispiel #43

0

Datei anzeigen

def enqueue_jobs(cls,
                 method,
                 ids_q_or_list,
                 queue_number,
                 use_rq=True,
                 chunk_size=25,
                 shortcut_fn=None):
    """
    Takes sqlalchemy query with IDs, runs fn on those repos.
    """

    shortcut_data = None
    if use_rq:
        empty_queue(queue_number)
        if shortcut_fn:
            raise ValueError("you can't use RQ with a shortcut_fn")

    else:
        if shortcut_fn:
            shortcut_data_start = time()
            print "Getting shortcut data..."
            shortcut_data = shortcut_fn()
            print "Got shortcut data in {}sec".format(
                elapsed(shortcut_data_start))

    chunk_size = int(chunk_size)

    start_time = time()
    new_loop_start_time = time()
    index = 0

    print "running this query: \n{}\n".format(
        ids_q_or_list.statement.compile(dialect=postgresql.dialect()))
    row_list = ids_q_or_list.all()
    print "finished query in {}sec".format(elapsed(start_time))
    if row_list is None:
        print "no IDs, all done."
        return None

    object_ids = [row[0] for row in row_list]

    num_jobs = len(object_ids)
    print "adding {} jobs to queue...".format(num_jobs)

    # iterate through chunks of IDs like [[id1, id2], [id3, id4], ...  ]
    object_ids_chunk = []

    for object_ids_chunk in chunks(object_ids, chunk_size):

        update_fn_args = [cls, method, object_ids_chunk]

        if use_rq:
            job = ti_queues[queue_number].enqueue_call(
                func=update_fn,
                args=update_fn_args,
                timeout=60 * 10,
                result_ttl=0  # number of seconds
            )
            job.meta["object_ids_chunk"] = object_ids_chunk
            job.save()
            # print u"saved job {}".format(job)
        else:
            update_fn_args.append(shortcut_data)
            update_fn(*update_fn_args, index=index)

        if True:  # index % 10 == 0 and index != 0:
            num_jobs_remaining = num_jobs - (index * chunk_size)
            try:
                jobs_per_hour_this_chunk = chunk_size / float(
                    elapsed(new_loop_start_time) / 3600)
                predicted_mins_to_finish = round(
                    (num_jobs_remaining / float(jobs_per_hour_this_chunk)) *
                    60, 1)
                print "\n\nWe're doing {} jobs per hour. At this rate, done in {}min".format(
                    int(jobs_per_hour_this_chunk), predicted_mins_to_finish)
                print "(finished chunk {} of {} chunks in {}sec total, {}sec this loop)\n".format(
                    index, num_jobs / chunk_size, elapsed(start_time),
                    elapsed(new_loop_start_time))
            except ZeroDivisionError:
                # print u"not printing status because divide by zero"
                print ".",

            new_loop_start_time = time()
        index += 1
    print "last chunk of ids: {}".format(list(object_ids_chunk))

    db.session.remove()  # close connection nicely
    return True

Beispiel #44

0

Datei anzeigen

def enqueue_jobs(cls,
                 method,
                 ids_q_or_list,
                 queue_number,
                 use_rq="rq",
                 chunk_size=10,
                 shortcut_fn=None):
    """
    Takes sqlalchemy query with (login, repo_name) IDs, runs fn on those repos.
    """

    shortcut_data = None
    if use_rq == "rq":
        empty_queue(queue_number)
        if shortcut_fn:
            raise ValueError("you can't use RQ with a shortcut_fn")

    else:
        if shortcut_fn:
            shortcut_data_start = time()
            print "Getting shortcut data..."
            shortcut_data = shortcut_fn()
            print "Got shortcut data in {}sec".format(
                elapsed(shortcut_data_start))

    chunk_size = int(chunk_size)

    start_time = time()
    new_loop_start_time = time()
    index = 0

    print "running this query: \n{}\n".format(
        ids_q_or_list.statement.compile(dialect=postgresql.dialect()))
    row_list = ids_q_or_list.all()
    print "finished query in {}sec".format(elapsed(start_time))
    if row_list is None:
        print "no IDs, all done."
        return None

    object_ids = [row[0] for row in row_list]

    num_jobs = len(object_ids)
    print "adding {} jobs to queue...".format(num_jobs)

    # iterate through chunks of IDs like [[id1, id2], [id3, id4], ...  ]
    object_ids_chunk = []

    for object_ids_chunk in chunks(object_ids, chunk_size):

        update_fn_args = [cls, method, object_ids_chunk]

        if use_rq == "rq":
            job = ti_queues[queue_number].enqueue_call(
                func=update_fn,
                args=update_fn_args,
                timeout=60 * 10,
                result_ttl=0  # number of seconds
            )
            job.meta["object_ids_chunk"] = object_ids_chunk
            job.save()
        else:
            update_fn_args.append(shortcut_data)
            update_fn(*update_fn_args)

        if index % 1000 == 0 and index != 0:
            print "added {} jobs to queue in {}sec total, {}sec this loop".format(
                index, elapsed(start_time), elapsed(new_loop_start_time))

            new_loop_start_time = time()
        index += 1
    print "last object added to the queue was {}".format(
        list(object_ids_chunk))

    db.session.remove()  # close connection nicely
    return True

Beispiel #45

0

Datei anzeigen

def iterstories(stories, include_tasks=False):
    for s in stories:
        yield s
        if include_tasks:
            for t in chunks(s.tasks, 2):
                yield PivotalTaskPair(t)