Beispiel #1
0
 def get_context_data(self, **kwargs):
     context = super(HomePageView, self).get_context_data(**kwargs)
     context['courses_slides'] = chunked(context['homepage'].promoted_courses.all(), 3)
     context['menthors_slides'] = chunked(context['homepage'].promoted_menthors.all(), 3)
     context['promoted_portfolios'] = Portfolio.objects.filter(
         home_published=True, status='published').order_by('-timestamp')[:8]
     return context
Beispiel #2
0
 def group_by_magnitude(collection):
     alen = len(collection)
     if alen > 1000:
         return chunked(collection, 100)
     if alen > 100:
         return chunked(collection, 10)
     return [collection]
Beispiel #3
0
def _iter_cores(cores, ncontainer):
    full_cores, part_cores = cores.get('full', []), cores.get('part', [])
    if not (full_cores or part_cores):
        return (([], []) for _ in range(ncontainer))

    return izip_longest(
        chunked(full_cores, len(full_cores)/ncontainer),
        chunked(part_cores, len(part_cores)/ncontainer),
        fillvalue=[]
    )
Beispiel #4
0
def chunked_join(iterable, int1, int2, str1, str2, func):
    """Chunk and join."""
    chunks = list(chunked(iterable, int1))
    logging.debug(chunks)
    groups = [list(chunked(chunk, int2)) for chunk in chunks]
    logging.debug(groups)
    return str1.join([
        str2.join([func(''.join(chunk)) for chunk in chunks])
        for chunks in groups
    ])
    def parse_obj(self,obj,dtype):
        dic = OD((('type','Feature'),('geometry',OD()),('properties',OD())))
        dic['properties']['class'] = dtype
        for child in obj:
            ctag = self.clip_tag(child.tag)
            if ctag in ['pos','area','loc']:
                if ctag == 'area':
                    dic['geometry']['type'] = 'Polygon'
                    dic['geometry']['coordinates'] = self.get_polygon_coord(child)
                else:
                    if ctag == 'pos':
                        dic['geometry']['type'] = 'Point'
                    elif ctag == 'loc':
                        dic['geometry']['type'] = 'LineString'
                    i = ""
                    for l in child.itertext():
                        i += l
                    l = list(chunked(i.strip().split(),2))
                    i = [[float(xy[1]),float(xy[0])] for xy in l]

                    if len(i) == 1:
                        dic['geometry']['coordinates'] = i[0]
                    else:
                        dic['geometry']['coordinates'] = i
            elif not child.text.strip() == '':
                dic['properties'][ctag]=child.text
            else:
                i = ''
                for l in child.itertext():
                    i += l
                dic['properties'][ctag]=i.strip()
        dic = self.chk_types(dic)
        return dic
Beispiel #6
0
    def __init__(self, recs):
        self.argslist = []

        # TODO make these separate nodes
        rec_values = (rec.value for rec in recs)
        for name, value, type_ in chunked(rec_values, 3):
            self.argslist.append((name, value, type_))
Beispiel #7
0
 def decl(self):
     logging.debug(_('args: %s'), self.args)
     args = self.args.strip().replace('__user ', '').split(',')
     logging.debug(_('args: %s'), args)
     args = [''.join(pair) for pair in chunked(args, 2)]
     return 'long {}({});'.format(
         self.name.strip(), ', '.join(args))
Beispiel #8
0
def cooccurrence(
    corpus,
    execnet_hub,
    targets,
    context,
    paths_progress_iter,
    output=('o', 'space.h5', 'The output space file.'),
):
    """Build the co-occurrence matrix."""

    if targets.index.nlevels > 1:
        targets.sortlevel(inplace=True)
    if context.index.nlevels > 1:
        context.sortlevel(inplace=True)

    def init(channel):
        channel.send(
            (
                'data',
                pickle.dumps(
                    {
                        'kwargs': {
                            'targets': targets,
                            'context': context,
                        },
                        'instance': corpus,
                        'folder_name': 'cooccurrence',
                    },
                )
            )
        )

    results = execnet_hub.run(
        remote_func=sum_folder,
        iterable=paths_progress_iter,
        init_func=init,
    )

    results = ([r] for r in results if r is not None)
    result = next(results)[0]

    for i, chunk in enumerate(chunked(results, 100)):
        logger.info('Received result chunk #%s.', i)
        chunked_result = [c[0] for c in chunk]

        with Timer() as timed:
            result = pd.concat(
                chunked_result + [result],
                copy=False,
            ).groupby(level=result.index.names).sum()

        logger.info(
            'Computed the result by merging a chunk of received results and the result in %.2f seconds.',
            timed.elapsed,
        )

    result = result.to_frame('count')
    result.reset_index(inplace=True)

    write_space(output, context, targets, result)
Beispiel #9
0
def add_to_spotify(db, spotify, album, original_artist, original_album):
    album = spotify.album(album["uri"])
    tracks = album["tracks"]
    track_ids = [t["uri"] for t in tracks["items"]]
    while tracks["next"]:
        tracks = spotify.next(tracks)
        track_ids.extend(t["uri"] for t in tracks["items"])

    click.echo("Adding {0} tracks to Spotify...".format(len(track_ids)))
    for chunk in chunked(track_ids, 50):
        response = spotify.current_user_saved_tracks_add(chunk)
        if response is not None:
            click.secho("F**k, something broke:")
            pprint(response)
            click.confirm("Continue?", abort=True)
            return

    cursor = db.cursor()
    cursor.execute(
        """UPDATE collection SET complete = 1
                      WHERE artist = ? AND album = ?""",
        [original_artist, original_album],
    )
    db.commit()
    click.secho("Done ", fg="green", nl=False)
    time.sleep(0.25)
Beispiel #10
0
def start(experiment_description, agent, environment, results_descriptor):
    """Kick off the execution of an experiment."""
    initialize_results(results_descriptor)
    interval_results = islice(interval_results_generator(agent, environment, experiment_description), experiment_description.num_steps)
    results_interval_chunks = chunked(interval_results, results_descriptor.interval)
    for chunk in results_interval_chunks:
        results = [interval_data.results for interval_data in chunk]
        write_results(merge_results(results), results_descriptor)
Beispiel #11
0
def parallelize_func(iterable, func, chunksz=1, n_jobs=16, *args, **kwargs):
    """ Parallelize a function over each element of an iterable. """
    chunker = func
    chunks = more_itertools.chunked(iterable, chunksz)
    chunks_results = Parallel(n_jobs=n_jobs, verbose=50)(
        delayed(chunker)(chunk, *args, **kwargs) for chunk in chunks)
    results = more_itertools.flatten(chunks_results)
    return list(results)
Beispiel #12
0
	def get_random_logs(self, limit):
		count = min(limit, self.db.count())
		ids = self.db.find({}, {'_id': 1})
		rand_ids = [r['_id'] for r in random.sample(list(ids), count)]
		for rand_ids_chunk in chunked(rand_ids, 100):
		    query = {'_id': {'$in': rand_ids_chunk}}
		    for doc in self.db.find(query, {'message': 1}):
			    yield doc['message']
Beispiel #13
0
    def create_partials(self, product, branch, platform, locales, revision,
                        chunk_name=1):
        """Calculates "from" and "to" MAR URLs and calls  create_task_graph().
        Currently "from" MAR is 2 releases behind to avoid duplication of
        existing CI partials.

        :param product: capitalized product name, AKA appName, e.g. Firefox
        :param branch: branch name (mozilla-central)
        :param platform: buildbot platform (linux, macosx64)
        :param locales: list of locales
        :param revision: revision of the "to" build
        :param chunk_name: chunk name
        """
        # TODO: move limit to config
        # Get last 5 releases (including current),
        # generate partial for 4 latest
        last_releases = self.balrog_client.get_releases(product, branch)[:5]
        release_to = last_releases.pop(0)
        per_chunk = 5
        for update_number, release_from in enumerate(last_releases, start=1):
            log.debug("From: %s", release_from)
            log.debug("To: %s", release_to)
            for n, chunk in enumerate(chunked(locales, per_chunk), start=1):
                extra = []
                for locale in chunk:
                    try:
                        build_from = self.balrog_client.get_build(
                            release_from, platform, locale)
                        log.debug("Build from: %s", build_from)
                        build_to = self.balrog_client.get_build(
                            release_to, platform, locale)
                        log.debug("Build to: %s", build_to)
                        from_mar = build_from["completes"][0]["fileUrl"]
                        to_mar = build_to["completes"][0]["fileUrl"]
                        extra.append({
                            "locale": locale,
                            "from_mar": from_mar,
                            "to_mar": to_mar,
                        })
                    except (requests.HTTPError, ValueError):
                        log.exception(
                            "Error getting build, skipping this scenario")

                if extra:
                    if len(locales) > per_chunk:
                        # More than 1 chunk
                        subchunk = n
                    else:
                        subchunk = None

                    all_locales = [e["locale"] for e in extra]
                    log.info("New Funsize task for %s", all_locales)
                    self.submit_task_graph(
                        branch=branch, revision=revision, platform=platform,
                        update_number=update_number, chunk_name=chunk_name,
                        extra=extra, subchunk=subchunk)
                else:
                    log.warn("Nothing to submit")
Beispiel #14
0
def c_layout(i, definition, template):
    c_name = layer_names[i]
    pretty_name = c_name.strip('_').capitalize()
    layout = d['layout']
    
    surround = lambda s: ''.join(interleave_longest(['│']*(len(s)+1), s))
    layer = list(map(uni, definition))
    layer[41] = layer[41].center(11)
    layer = chunked(layer, 12)
    rows = intersperse(mid, map(surround, layer))
    pretty = '\n'.join(itertools.chain([top], rows, [bottom]))
    
    surround = lambda s: ', '.join(s)
    layer = list(map(lambda k: layer_name.get(k, k), definition))
    layer = chunked(layer, 12)
    rows = map(surround, layer)
    c_layer = ',\n    '.join(itertools.chain([], rows, []))
    
    return template.format(pretty_name, pretty, c_name, layout, c_layer)
Beispiel #15
0
    def score(self, rev_ids, caches=None, cache=None):
        if isinstance(rev_ids, int):
            rev_ids = [rev_ids]

        batches = batch_rev_caches(chunked(rev_ids, self.batch_size), caches,
                                   cache)

        for batch_scores in self.scores_ex.map(self._score_batch, batches):
            for score in batch_scores:
                yield score
def main(args):
    # get the arguments
    method = args.method
    win_size = args.win_size
    step = args.step
    metric_name = args.metric_name
    n_jobs = args.workers

    # Load the data.
    L, H, olddf, newdf = pickle.load(open(args.filename))
    words = pd.Series(olddf.word.values.ravel()).unique()
    oldrows = []
    newrows = []
    sourcexrange = np.arange(args.mint, args.maxt, step)
    destxrange = np.arange(args.mint, args.maxt, step)
    if method == 'win':
        sourcexrange = sourcexrange[win_size:]
        destxrange = destxrange[:-win_size]

    if args.interpolate:
        sourcexinter = np.arange(sourcexrange[0], sourcexrange[-1] + 1, 1)
        destxinter = np.arange(destxrange[0], destxrange[-1] + 1, 1)
    else:
        sourcexinter = sourcexrange
        destxinter = destxrange

    # Construct the series
    assert(len(sourcexinter) == len(destxinter))
    chunk_sz = np.ceil(len(words)/float(n_jobs))
    words_chunks = more_itertools.chunked(words, chunk_sz)
    timeseries_chunks = Parallel(n_jobs=n_jobs, verbose=20)(delayed(process_chunk)(chunk, create_word_time_series, olddf, newdf,
                                                                               sourcexinter, destxinter,
                                                                               metric_name=metric_name,
                                                                               interpolate=args.interpolate) for chunk in words_chunks)

    timeseries = list(more_itertools.flatten(timeseries_chunks))

    # Dump the data frame
    for orow, newrow in timeseries:
        if orow and newrow:
            oldrows.append(orow)
            newrows.append(newrow)

    oldtimeseries = pd.DataFrame()
    newtimeseries = pd.DataFrame()
    header = ['word']
    header.extend(sourcexinter)
    newheader = ['word']
    newheader.extend(destxinter)
    oldtimeseries = oldtimeseries.from_records(oldrows, columns=header)
    oldtimeseries = oldtimeseries.fillna(method='backfill', axis=1)
    newtimeseries = newtimeseries.from_records(newrows, columns=newheader)
    newtimeseries = newtimeseries.fillna(method='backfill', axis=1)
    oldtimeseries.to_csv(args.sourcetimef, encoding='utf-8')
    newtimeseries.to_csv(args.endtimef, encoding='utf-8')
Beispiel #17
0
def write_results(results, results_descriptor):
    """Output the given results to terminal and to file."""
    output_path = results_descriptor.output_path
    keys = results_descriptor.keys
    value_vectors = (results[key] for key in keys)
    rows = chunked(interleave(value_vectors), len(keys))
    string_rows = map(lambda v: ' '.join(str(x) for x in v), rows)
    all_string_rows = '\n'.join(string_row for string_row in string_rows)
    keys_string = ' '.join(key for key in keys)
    output_stdout(keys_string + '\n' + all_string_rows, output_path)
    output_file(all_string_rows, output_path)
 def update_graphs(self):
     """Get data from shared mp array and appends to graph if we are ready to do so"""
     if self.sync_event.is_set():
         if self.plots_are_reset:
             self.arrays_plots = {self.plots[ch]:
                                  chunked([n for n in self.np_array[i] if not np.isnan(n)], 50)
                                  for i, ch in enumerate(self.ch_num)
                                  if not np.isnan(self.np_array[i][0])}
             self.add_point_to_graph()
     else:
         qc.QTimer.singleShot(5, self.update_graphs)
Beispiel #19
0
    def create(cls, network_id, cidr):
        network = cls(network_id, cidr)
        ip_network = network.cidr

        rds.set(cls._CIDR_KEY % network_id, cidr)
        rds.set(cls._NETWORK_ID_KEY % cidr, network_id)

        key = cls._NETWORK_IPS_KEY % network_id
        for ipnums in more_itertools.chunked(xrange(ip_network.first, ip_network.last+1), 500):
            rds.sadd(key, *ipnums)

        return network
 def get_polygon_coord(self,obj):
     #get exterior coords
     coord = []
     i = ""
     ext = obj.find('.//gml:exterior',self.ns)
     for l in ext.itertext():
         i += l
     l = list(chunked(i.strip().split(),2))
     coord.append([[float(xy[1]),float(xy[0])] for xy in l])
     #get interior coords
     inte = obj.findall('.//gml:interior',self.ns)
     if not inte:
         return coord
     else:
         for i in inte:
             j = ""
             for l in i.itertext():
                 j += l
             l = list(chunked(j.strip().split(),2))
             coord.append([[float(xy[1]),float(xy[0])] for xy in l])
     return coord
Beispiel #21
0
def main(argv=None):
    parser = argparse.ArgumentParser(
        formatter_class=WrappedTextHelpFormatter,
        description=DESCRIPTION.strip(),
    )
    parser.add_argument(
        '--sleep',
        help='how long in seconds to sleep before submitting the next group',
        type=int,
        default=SLEEP_DEFAULT
    )
    parser.add_argument('--host', help='host for system to reprocess in', default=DEFAULT_HOST)
    parser.add_argument('crashid', help='one or more crash ids to fetch data for',
                        nargs='*', action=FallbackToPipeAction)

    if argv is None:
        args = parser.parse_args()
    else:
        args = parser.parse_args(argv)

    api_token = os.environ.get('SOCORRO_REPROCESS_API_TOKEN')
    if not api_token:
        print('You need to set SOCORRO_REPROCESS_API_TOKEN in the environment')
        return 1

    url = args.host.rstrip('/') + '/api/Reprocessing/'
    print('Sending reprocessing requests to: %s' % url)
    session = session_with_retries()

    crash_ids = args.crashid
    print('Reprocessing %s crashes sleeping %s seconds between groups...' % (
        len(crash_ids), args.sleep
    ))

    groups = list(chunked(crash_ids, CHUNK_SIZE))
    for i, group in enumerate(groups):
        print('Processing group ending with %s ... (%s/%s)' % (group[-1], i + 1, len(groups)))
        resp = session.post(
            url,
            data={'crash_ids': group},
            headers={
                'Auth-Token': api_token
            }
        )
        if resp.status_code != 200:
            print('Got back non-200 status code: %s %s' % (resp.status_code, resp.content))
            continue

        # NOTE(willkg): We sleep here because the webapp has a bunch of rate limiting and we don't
        # want to trigger that. It'd be nice if we didn't have to do this.
        time.sleep(args.sleep)

    print('Done!')
Beispiel #22
0
def test_container_release_cores(test_db):
    a = App.get_or_create('app', 'http://git.hunantv.com/group/app.git')
    v = a.add_version(random_sha1())
    p = Pod.create('pod', 'pod', 10, -1)
    host = Host.create(p, random_ipv4(), random_string(), random_uuid(), 200, 0)

    for core in host.cores:
        assert core.host_id == host.id
        assert core.remain == 10

    containers = []
    
    cores = sorted(host.cores, key=operator.attrgetter('label'))
    for fcores, pcores in zip(chunked(cores[:100], 10), chunked(cores[100:], 10)):
        used_cores = {'full': fcores, 'part': pcores}
        host.occupy_cores(used_cores, 5)
        c = Container.create(random_sha1(), host, v, random_string(), 'entrypoint', used_cores, 'env', nshare=5)
        containers.append(c)

    cores = sorted(host.cores, key=operator.attrgetter('label'))
    for fcores, pcores in zip(chunked(cores[:100], 10), chunked(cores[100:], 10)):
        for core in fcores:
            assert core.remain == 0
        for core in pcores:
            assert core.remain == 5

    for c in containers:
        c.delete()

    cores = sorted(host.cores, key=operator.attrgetter('label'))
    for fcores, pcores in zip(chunked(cores[:100], 10), chunked(cores[100:], 10)):
        for core in fcores:
            assert core.remain == 10
        for core in pcores:
            assert core.remain == 10
Beispiel #23
0
def fix_ip(n):
    network = n.network
    base = int(network.network_address)
    for ipnums in more_itertools.chunked(xrange(base+n.gateway_count, base+network.num_addresses), 500):
        rds.sadd(n.storekey, *ipnums)

    rds.sadd(n.gatekey, *range(base, base+n.gateway_count))

    for ip in n.ips.all():
        rds.srem(n.storekey, ip.ipnum)

    for gateway in n.gates.all():
        rds.srem(n.gatekey, gateway.ipnum)
Beispiel #24
0
    def create_partials(self, product, branch, platform, locales, revision,
                        mar_urls, mar_signing_format):
        """Calculates "from" and "to" MAR URLs and calls create_task_graph().
        Currently "from" MAR is 2 releases behind to avoid duplication of
        existing CI partials.
        :param product: capitalized product name, AKA appName, e.g. Firefox
        :param branch: branch name (mozilla-central)
        :param platform: buildbot/taskcluster platform (linux, macosx64)
        :param locales: list of locales
        :param revision: revision of the "to" build
        :param mar_urls: dictionary of {locale:mar file url} for each locale
        """
        # TODO: move limit to config
        partial_limit = 4
        per_chunk = 5

        tasks = defaultdict(list)

        for locale in locales:
            to_mar = mar_urls.get(locale)
            log.info("Build to: %s", to_mar)
            latest_releases = self.get_builds(
                product, platform, branch, locale, to_mar, partial_limit)
            for update_number, build_from in enumerate(latest_releases, start=1):
                log.info("Build from: %s", build_from)
                try:
                    from_mar = build_from['completes'][0]['fileUrl']
                except ValueError as excp:
                    log.error("Unable to extract fileUrl from %s: %s",
                              build_from, excp)
                    continue

                tasks[update_number].append({
                    "locale": locale,
                    "from_mar": from_mar,
                    "to_mar": to_mar,
                })

        for update_number in tasks:
            for extra in chunked(tasks[update_number], per_chunk):
                all_locales = [e["locale"] for e in extra]
                log.info("New Funsize task for %s", all_locales)
                locale_desc = "_".join(all_locales)
                locale_desc = locale_desc.replace('-', '_')
                self.submit_task_graph(
                    branch=branch, revision=revision, platform=platform,
                    update_number=update_number,
                    extra=extra, locale_desc=locale_desc,
                    mar_signing_format=mar_signing_format)
Beispiel #25
0
    def _score(self, context, model, rev_ids):
        logging.debug("Starting up thread pool with {0} workers"
                      .format(self.workers))
        with ThreadPoolExecutor(max_workers=self.workers) as executor:
            futures = []
            for rev_id_batch in chunked(rev_ids, self.batch_size):
                rev_id_batch = list(rev_id_batch)
                logging.debug("Starting batch of {0} revids"
                              .format(len(rev_id_batch)))
                futures.append(executor.submit(self._score_request,
                                               context, model,
                                               rev_id_batch))

            for future in futures:
                for score in future.result():
                    yield score
Beispiel #26
0
 def get(self, *args):
     self.set_header("Content-Type", "text/event-stream")
     north, south, east, west = map(float, args)
     start_t = datetime.now()
     query_range = r.polygon(r.point(west, north), r.point(west, south), r.point(east, south), r.point(east, north))
     selection = r.table("streets").get_intersecting(query_range, index="geometry")
     initial_t = (datetime.now() - start_t).total_seconds()
     cursor = selection.map(r.row["geometry"].to_geojson()).run(self.conn)
     size = 0
     for chunk in chunked(cursor, 2000):
         size += len(chunk)
         self.write_event(chunk)
     self.write_event("done")
     total_t = (datetime.now() - start_t).total_seconds()
     print "street query took", initial_t, "s for the first batch",
     print "(", total_t, "s total) and provided", size, "results."
def on_reload() -> None:
    """Рендерит html страницы."""
    env = Environment(
        loader=FileSystemLoader('.'),
        autoescape=select_autoescape(['html', 'xml']),
    )
    template = env.get_template('template.html')
    for page, chunk in enumerate(chunked(books, book_per_page),
                                 1):  # noqa: WPS221
        rendered_page = template.render({
            'chunk': chunk,
            'pages': pages,
            'page': page
        })
        with open(os.path.join('pages', f'index{page}.html'),
                  'w',
                  encoding='utf8') as html_file:  # noqa: WPS221
            html_file.write(rendered_page)
Beispiel #28
0
    def find_missing(self, num_workers, date):
        check_crashids_for_date = partial(check_crashids, date=date)

        missing = []
        entropy_chunked = chunked(self.get_entropy(), CHUNK_SIZE)
        if num_workers == 1:
            for result in map(check_crashids_for_date, entropy_chunked):
                missing.extend(result)
        else:
            with concurrent.futures.ProcessPoolExecutor(
                max_workers=num_workers
            ) as executor:
                for result in executor.map(
                    check_crashids_for_date, entropy_chunked, timeout=WORKER_TIMEOUT
                ):
                    missing.extend(result)

        return list(missing)
def iter_examples(filename, stoi, window_size=5, batch_size=20):
    """
        reads from file and generates batched tensor examples
    """
    # numericalize
    iter_ex = (numericalize_example(e, stoi)
               for e in iter_data(filename, window_size))
    # fill-value to pad contexts with
    fv = stoi[PAD_TOK]
    for example in mit.chunked(iter_ex, batch_size):
        #de-tuple
        words, contexts = zip(*example)
        # pad contexts
        contexts = list(zip(*it.zip_longest(*contexts, fillvalue=fv)))
        # create tensors
        word_tensor = torch.tensor(words)
        context_tensor = torch.tensor(contexts)
        yield word_tensor, context_tensor
Beispiel #30
0
    def create(cls, name, netspace):
        """create network and store ips(int) under this network in redis"""
        try:
            n = cls(name, netspace)
            db.session.add(n)
            db.session.commit()

            # create sub IPs
            network = n.network
            base = int(network.network_address)
            # 一次写500个吧
            for ipnums in more_itertools.chunked(xrange(base+1, base+network.num_addresses), 500):
                rds.sadd(n.storekey, *ipnums)

            return n
        except sqlalchemy.exc.IntegrityError:
            db.session.rollback()
            return None
Beispiel #31
0
def load_test(ske, block, id_seperate, batch_size):
    '''
    To load and shuffle the test set.
    '''
    test_block = block[id_seperate == 3]
    test_ske = ske[id_seperate == 3]

    np.random.seed(52)
    state = np.random.get_state()
    np.random.shuffle(test_block)
    np.random.set_state(state)
    np.random.shuffle(test_ske)

    test_ske = test_ske.flatten()
    test_block = test_block.reshape(-1, 3)
    test_ske = list(chunked(test_ske, batch_size))

    return (test_ske, test_block)
def apply_parallel(data: List[Any], func: Callable) -> List[Any]:
    """
    Apply function to list of elements.

    Automatically determines the chunk size.
    """
    cpu_cores = cpu_count()

    try:
        chunk_size = ceil(len(data) / cpu_cores)
        pool = Pool(cpu_cores)
        transformed_data = pool.map(func,
                                    chunked(data, chunk_size),
                                    chunksize=1)
    finally:
        pool.close()
        pool.join()
        return transformed_data
Beispiel #33
0
    def __init__(self,
                 ctx,
                 entries,
                 *,
                 per_page=15,
                 title=discord.Embed.Empty,
                 colour=None,
                 **kwargs):
        super().__init__(ctx, **kwargs)
        self._pages = tuple(chunked(entries, per_page))
        self._index = 0

        if colour is None:
            colour = ctx.bot.colour

        # These should probably be removed at some point in the future.
        self.title = title
        self.colour = colour
Beispiel #34
0
def make_dataset(video_dir_path, video_phase_annotation_path, phase_list,
                 sample_duration):
    """
    Construct dataset of samples from a given video directory path.

    Each sample is a python dictionary containing the video path and
    indices of 16-frame portions from that video file, as well as the
    associated class label of that portion.

    video_phase_annotation_path file is supposed to contain a frame index and
    corresponding class label (surgical phase) at each of its row.

    :param root_path: Absolute path to the root directory of video and timestamp files.
    :param phase_list: List of all possible phases (classes)
    :param subset: training, validation, or testing.
    :param idx_subset: list of exact video file indices for the chosen subset.
    :param sample_duration: number of frames each sample contains
    :return: list of samples.
    """

    class_to_idx = {phase_list[i]: i for i in range(len(phase_list))}

    dataset = []

    df = pd.read_csv(video_phase_annotation_path, delim_whitespace=True)

    sample = {
        'video': video_dir_path,
        'video_id': os.path.basename(video_dir_path),
    }

    for phase in phase_list:

        df_phase = df.loc[df['Phase'] == phase]

        for group in consecutive_groups(df_phase['Frame'] + 1):

            for chunk in chunked(group, sample_duration):
                sample_j = copy.deepcopy(sample)
                sample_j['frame_indices'] = chunk
                sample_j['label'] = class_to_idx[phase]
                dataset.append(sample_j)

    return dataset
Beispiel #35
0
def make_source_data(session: sa_orm.Session, metrics: SourceDataMetrics,
                     available_schemas: list):
    """
    Creates source data based on metrics and available schemas

    :param session: SQLAlchemy session
    :param metrics: determines how many instances of each model to create
    :param available_schemas: list of dictionaries describing form schemas
    """

    # create all the forms from the available schemas
    schema_iterator = factory.Iterator(available_schemas, cycle=True)
    forms = FormFactory.build_batch(metrics.forms, schema=schema_iterator)
    session.add_all(forms)
    session.flush()

    # create all the users
    users = UserFactory.build_batch(metrics.users)
    session.add_all(users)
    session.flush()

    # use the node path map cache to generate submission responses JSON
    get_node_path_map = transformers.get_node_path_map_cache(session)
    _cached_make_response = functools.partial(make_response, get_node_path_map)

    # Create all the submissions
    #
    # To support building a very large number of submissions, we avoid using factory_boy.Factory.build_batch()
    # because it constructs all instances at once in memory as a list
    #
    # Instead, we us a generator and insert 500 instances at a time

    submission_factory = functools.partial(
        SubmissionFactory,
        f_make_response=_cached_make_response,
        form=factory.Iterator(forms, cycle=True),
        user=factory.Iterator(users, cycle=True),
    )
    submissions_generator = (submission_factory()
                             for _ in range(metrics.submissions))
    for chunk in more_itertools.chunked(submissions_generator, 500):
        session.bulk_save_objects(chunk)

    session.flush()
Beispiel #36
0
def generate_embeddings_iter(
    model,
    file_path: pathlib.Path,
    batch_size: int,
    device: torch.device,
    seen_set: Set[int],
    min_confidence: float = 0.5,
):
    with h5py.File(str(file_path), "r") as f:
        image_dset = f["image"]
        confidence_dset = f["confidence"]
        external_id_dset = f["external_id"]

        for slicing in chunked(range(len(image_dset)), batch_size):
            slicing = np.array(slicing)
            external_ids = external_id_dset[slicing]
            mask = external_ids == 0

            if np.all(mask):
                break

            mask = (~mask) & (confidence_dset[slicing] >= min_confidence)

            for i, external_id in enumerate(external_ids):
                if int(external_id) in seen_set:
                    mask[i] = 0

            if np.all(~mask):
                continue

            images = image_dset[slicing][mask]
            images = np.moveaxis(images, -1, 1)  # move channel dim to 1st dim

            with torch.no_grad():
                torch_images = torch.tensor(images,
                                            dtype=torch.float32,
                                            device=device)
                embeddings = model.extract_features(torch_images).cpu().numpy()

            max_embeddings = np.max(embeddings, (-1, -2))
            yield (
                max_embeddings,
                external_ids[mask],
            )
def display_duplicates(duplicates, db, trash="./Trash/"):
    from werkzeug.routing import PathConverter
    import io

    class EverythingConverter(PathConverter):
        regex = '.*?'

    app = Flask(__name__)
    CORS(app)
    app.url_map.converters['everything'] = EverythingConverter

    def render(duplicates, current, total):
        env = Environment(loader=FileSystemLoader('template'))
        template = env.get_template('index.html')
        return template.render(duplicates=duplicates,
                               current=current,
                               total=total)

    with TemporaryDirectory() as folder:
        # Generate all of the HTML files
        chunk_size = 25
        for i, dups in enumerate(chunked(duplicates, chunk_size)):
            with open('{}/{}.html'.format(folder, i), 'w') as f:
                f.write(render(dups,
                               current=i,
                               total=math.ceil(len(duplicates) / chunk_size)))

        webbrowser.open("file://{}/{}".format(folder, '0.html'))

        @app.route('/picture/<everything:file_name>', methods=['DELETE'])
        def delete_picture_(file_name, trash=trash):
            return str(delete_picture(file_name, db, trash))

        @app.route('/heic-transform/<everything:file_name>', methods=['GET'])
        def transcode_heic_(file_name):
            heif_image = pyheif.read_heif(open(file_name, 'rb'))

            image = Image.frombytes(
                mode=heif_image.mode, size=heif_image.size, data=heif_image.data)
            encoded = io.BytesIO()
            image.save(encoded, format='JPEG')
            return Response(encoded.getvalue(), mimetype='image/jpeg')

        app.run()
def upload_s3(s_bucket, s_key):
    clnt = boto3.client('s3', region_name=DEFAULT_REGION)
    doc_serv = boto3.client('cloudsearchdomain',
                            region_name=DEFAULT_REGION,
                            endpoint_url=ENDPOINT_URL)
    response = clnt.get_object(Bucket=s_bucket, Key=s_key)
    raw_data_gz = response.get('Body').read()
    raw_data = zlib.decompress(raw_data_gz, 16 + zlib.MAX_WBITS)
    for raw_data_line in raw_data.splitlines():
        json_data = json.loads(raw_data_line.decode('utf-8'))
        for big_chunk in more_itertools.chunked(json_data['Records'],
                                                CHUNK_SIZE):
            for json_event in big_chunk:
                doc = {}
                doc_id = json_event['eventID']

                def search(obj, pattern):
                    cur_obj = obj
                    for item in pattern.split('.'):
                        if not isinstance(cur_obj, dict):
                            return None
                        cur_obj = cur_obj.get(item, None)
                    return cur_obj

                for cs_name, ct_name in MAPPING.items():
                    val = search(json_event, ct_name)
                    if val != None:
                        application.logger.debug(
                            "docId[%s] Adding field CloudSearch ID: %s = %s",
                            doc_id, cs_name, val)
                        doc[cs_name] = val
                doc['raw'] = json.dumps(json_event)
                inbytes = dict_to_binary(doc, doc_id)
                try:
                    response = doc_serv.upload_documents(
                        contentType='application/json', documents=inbytes)
                except Exception as exception:
                    e = exception
                    string = json.dumps(doc)
                    string = '[{{\"type\":\"add\", \"id\":\"{t}\",\"fields\":{s}}}]'.format(
                        doc_id, string)
                    application.logger.error(str(e.args) + "JSON: " + string)
                    application.logger.debug('Inserting docId: %s', doc_id)
                application.logger.info('CloudSearch commit ok')
Beispiel #39
0
def sample_model(model, n , batch_size=256, smiles_column='branch_smiles'):
    n_loops = int(np.ceil(n / batch_size))
    smiles_list, mofs, props = [], [], []
    for chunk in tqdm(chunked(range(n), batch_size), total=n_loops, desc='Samples'):
        z = model.sample_z_prior(len(chunk))
        outs = model.z_to_outputs(z)
        smiles_list.extend(outs['x'])
        mofs.extend(outs['mof'])
        props.extend(outs['y'])

    props = np.stack(props)
    gen_df = pd.DataFrame(smiles_list, columns=[smiles_column])
    gen_df['valid'] = gen_df[smiles_column].apply(valid_smiles)

    for index,label in enumerate(model.vocab_mof.categories):
        gen_df[label] = [m[index] for m in mofs]
    for index, label in enumerate(model.vocab_y.labels):
        gen_df[label] = props[:,index]
    return gen_df
  def parseOcrFile(self, filePath):
    """parseOcrFile takes an argument that specifies file's name and path. It opens that file and parses out numerals algorithmically using OcrNumeralParser.parseOcrLines().

    returns a list of OcrNumeral Lists, one sublist per OCR account number."""
    conditionedFileList = []
    f = open(filePath, 'r')
    ocrNumeralsListofLists = []

    #First strip newlines from each line of file
    for line in f:
      conditionedFileList.extend([line.rstrip("\n")])

    ocrSeqLists = list(chunked(conditionedFileList, 4))

    for ocrSeq in ocrSeqLists:
      resultList = self.parseOcrLines(ocrSeq)
      ocrNumeralsListofLists.extend([resultList])
    
    return ocrNumeralsListofLists
Beispiel #41
0
def get_aws_batch_job_infos(all_job_ids, boto_config=None, missing_ok=False):
    if boto_config is None:
        boto_config = BOTO_CONFIG
    # ensure that the list of job ids is unique
    assert len(all_job_ids) == len(set(all_job_ids))
    batch_client = boto3.client(service_name="batch", config=boto_config)
    returned_jobs = []
    for batch_job_ids in more_itertools.chunked(all_job_ids, 50):
        batch_returned_jobs = _get_aws_batch_job_infos_for_batch(
            batch_job_ids, batch_client, missing_ok=missing_ok
        )
        returned_jobs.extend(batch_returned_jobs)

    returned_ids = [job["jobId"] for job in returned_jobs]
    if not missing_ok:
        assert sorted(returned_ids) == sorted(all_job_ids), str(set(returned_ids) - set(all_job_ids)) + str(
            set(all_job_ids) - set(returned_ids)
        )
    return returned_jobs
async def load_data(override=False):
    redis = await aioredis.create_redis_pool('redis://localhost', password="******")
    await redis.flushall()
    values = await redis.scard('dict:all')

    if values < 1000000 or override:
        all_english_words = load_words(
            path.join(getcwd(), "./scraper/utils/data/english_dictionary/wlist_match2.txt")).union(
            load_words(path.join(getcwd(), "./scraper/utils/data/english_dictionary/wlist_match1.txt")).union(
                load_words(path.join(getcwd(), "./scraper/utils/data/english_dictionary/personal_whitelist.txt"))))

        chunks = list(chunked(all_english_words, 10000))
        for chunk in chunks:
            await redis.sadd('dict:all', *chunk)
        new_len = await redis.scard('dict:all')
        print(f'Database seeded with {new_len} values')
    else:
        print(f'data already loaded with {values} values')
        return
Beispiel #43
0
def batch_multiprocess_with_return(
    function_list, pool_results=None, n_cores=mp.cpu_count(), show_progress=True,
    tqdm_desc=None):
    """
    Run a list of functions on `n_cores` (default: all CPU cores),
    with the option to show a progress bar using tqdm (default: shown).
    """
    iterator = [*chunked(function_list, n_cores)]
    pool_results = pool_results if pool_results else []
    pool = Pool(processes=n_cores)
    if show_progress:
        iterator = tqdm(iterator, desc=tqdm_desc)
    for func_batch in iterator:
        procs = []
        for f in func_batch:
            pool.apply_async(func=f, callback=pool_results.append)
    pool.close()
    pool.join()
    return pool_results
Beispiel #44
0
def main() -> None:
    args = parse_arguments()

    os.environ["TOKENIZERS_PARALLELISM"] = "false"
    log.info(args)
    tf_counter: TCounter[str] = Counter()
    df_counter: TCounter[str] = Counter()
    with mp.Pool(args.workers, initializer=init,
                 initargs=(args.arch, )) as pool:
        results = pool.imap(analyze, chunked(tqdmf(args.tsv), args.batch_size))
        for tf, df in results:
            tf_counter += tf
            df_counter += df

    vocab = sorted(tf_counter.keys(),
                   key=lambda x: tf_counter[x],
                   reverse=True)
    for word in vocab:
        print(f"{word}\t{tf_counter[word]}\t{df_counter[word]}")
def main():
    env = Environment(loader=FileSystemLoader('.'),
                      autoescape=select_autoescape(['html']))

    with open('static/data.json', 'r', encoding='utf8') as f:
        books_json = f.read()

    books = json.loads(books_json)
    parts = list(chunked(books, 10))
    quantity_pages = len(parts)
    os.makedirs('pages', exist_ok=True)
    template = env.get_template('template.html')

    pages = writer_pages(parts, template, quantity_pages)

    delete_unnecessary_files(pages)

    server = HTTPServer(('0.0.0.0', 8000), SimpleHTTPRequestHandler)
    server.serve_forever()
Beispiel #46
0
def fisher(hits, cluster_sizes, test_args, cores):
    raise NotImplementedError(fisher_exact_package_error_msg)
    slices = [
        slice(l[0], l[-1] + 1)
        for l in more_itertools.chunked(np.arange(hits.shape[1]), cores)
    ]
    # print("Starting fisher test")
    t1 = time()
    pvalues_partial_dfs = Parallel(cores)(
        delayed(_run_fisher_exact_test_in_parallel_loop)(
            df=hits.iloc[:, curr_slice],
            cluster_sizes=cluster_sizes,
            test_args=test_args,
        )
        for curr_slice in slices
    )
    # print("Took ", (time() - t1) / 60, " min")
    pvalues = pd.concat(pvalues_partial_dfs, axis=0).sort_index()
    return pvalues
def apply_parallel(func: Callable,
                   data: List[Any],
                   cpu_cores: int = None) -> List[Any]:
    if not cpu_cores:
        cpu_cores = cpu_count()
    try:
        chunk_size = ceil(len(data) / cpu_cores)
        print(chunk_size)
        # pool = Pool(cpu_cores)
        # print("pool")
        chunks = chunked(data, chunk_size)
        # print("was chunked")
        transformed_data = map(func, data)  #chunks, chunksize=1)
        print(type(transformed_data))
        print("data was transformed")
    finally:
        # pool.close()
        # pool.join()
        return list(transformed_data)
def check_if_spelled_right_test():
    seed_redis()
    if redis_client.scard('mispelledwords') < 35000:
        wrong_words = load_words(
            path.join(
                getcwd(),
                "./backend/scraper/scraper_lib/test_scraper_lib/some_incorrect_words.txt"
            ))
        chunks = list(chunked(wrong_words, 10000))
        print('adding words in')
        for chunk in chunks:
            redis_client.sadd('mispelledwords', *chunk)
    # len = redis_client.sdiff('dict:all', 'mispelledwords')
    # wrong_words = [word.lower() for word in wrong_words]
    # missed = []
    # for word in wrong_words:
    #     if redis_client.sismember("dict:all", word) and re.match('^[a-z]*$', word):
    #         missed.append(word)
    assert len(redis_client.sinter('dict:all', 'mispelledwords')) == 0
Beispiel #49
0
def run_sending(groups, reserve_time=None):
    # Start sms sending group by group
    for group_name, group_mobiles in groups:
        sms_text = TEXTS[group_name]
        kind = 'timed(%s)' % reserve_time if reserve_time else 'instant'

        print 'Start sending %s sms to group(%s)(%s total)' % (
            kind, group_name, len(group_mobiles))
        for seq, mobiles in enumerate(chunked(group_mobiles, 100)):
            phones = [str(m) for m in mobiles if m]
            print '**The %dth group(expect %s, actual %s)' % (
                seq, len(mobiles), len(phones))
            if not phones:
                continue

            if reserve_time:
                send_reserved_sms_via_yimei(phones, sms_text, reserve_time)
            else:
                send_instant_sms_via_yimei(phones, sms_text)
Beispiel #50
0
    def from_iterator(
        cls,
        name: str,
        iterator: Iterable[str],
        batch_size: int = 64,
        overwrite: bool = False,
    ) -> Dataset:
        dataset = cls(name, overwrite=overwrite)

        dataset.data["raw"] = Raw.from_dask_array(
            common.PROJDIR / name / (name + ".raw.zarr.zip"),
            da.concatenate([
                da.from_array(np.array(chunk, dtype=np.bytes_))
                for chunk in chunked(iterator, batch_size)
            ]),
            overwrite=overwrite,
        )
        dataset.save()
        return dataset
Beispiel #51
0
def generate(out):
    lst = ['DRS_fail'] * 128
    lst[ord('n')] = 'DRS_null'
    lst[ord('t')] = 'DRS_true'
    lst[ord('f')] = 'DRS_false'
    lst[ord('I')] = 'DRS_inf'
    lst[ord('N')] = 'DRS_nan'
    lst[ord('"')] = 'DRS_string'
    lst[ord("'")] = 'DRS_string'
    lst[ord('{')] = 'DRS_recursive'
    lst[ord('[')] = 'DRS_recursive'
    for c in '+-.0123456789':
        lst[ord(c)] = 'DRS_number'

    print('#ifndef JSON5EncoderCpp_decoder_recursive_select', file=out)
    print('#define JSON5EncoderCpp_decoder_recursive_select', file=out)
    print(file=out)
    print('// GENERATED FILE', file=out)
    print('// All changes will be lost.', file=out)
    print(file=out)
    print('#include <cstdint>', file=out)
    print(file=out)
    print('namespace JSON5EncoderCpp {', file=out)
    print('inline namespace {', file=out)
    print(file=out)
    print('enum DrsKind : std::uint8_t {', file=out)
    print(
        '    DRS_fail, DRS_null, DRS_true, DRS_false, DRS_inf, DRS_nan, DRS_string, DRS_number, DRS_recursive',
        file=out)
    print('};', file=out)
    print(file=out)
    print('static const DrsKind drs_lookup[128] = {', file=out)
    for chunk in chunked(lst, 8):
        print('   ', end='', file=out)
        for t in chunk:
            print(' ', t, ',', sep='', end='', file=out)
        print(file=out)
    print('};', file=out)
    print(file=out)
    print('}  // anonymous inline namespace', sep='', file=out)
    print('}  // namespace JSON5EncoderCpp', sep='', file=out)
    print(file=out)
    print('#endif', sep='', file=out)
Beispiel #52
0
def polishReads(MISMATCH_RESULT, NANOPORE_READ, TEMP_DIR, FINAL_DIR,
                POLISHED_READ, THREADS, PENALTY_PATH, minimapPath, poaPath,
                raconPath, seqkitPath):
    if os.path.exists(TEMP_DIR):
        logger.warning(f"{TEMP_DIR} existed!!")
    else:
        os.mkdir(TEMP_DIR)
    if os.path.exists(FINAL_DIR):
        logger.warning(f"{FINAL_DIR} existed!!")
    else:
        os.mkdir(FINAL_DIR)

    logger.info('read mismatch results')
    mismatchResult = pd.read_feather(MISMATCH_RESULT)
    logger.info('prepare for polish')
    mismatchResult["readStrand"] = (mismatchResult["readStrand"]
                                    ^ mismatchResult["umiStrand"])
    mismatchResult.drop("umiStrand", axis=1, inplace=True)
    mismatchResult["readStrand"] = mismatchResult["readStrand"].astype(str)
    mismatchResult[
        "temp"] = mismatchResult["name"] + "_" + mismatchResult["readStrand"]
    sameUmiReadDt = mismatchResult.groupby("qseqid")["temp"].agg(
        lambda x: list(x))
    sameUmiReadDc = {i: [[k] for k in j] for i, j in sameUmiReadDt.items()}
    logger.info('start polish')
    umiReadDcIter = chunked(sameUmiReadDc.items(), 100)
    i = 0
    allResults = []
    with ProcessPoolExecutor(THREADS) as multiP:
        for umiReadDtChunk in umiReadDcIter:
            i += 1
            allResults.append(
                multiP.submit(chunkPolishSeq, umiReadDtChunk, NANOPORE_READ,
                              TEMP_DIR, FINAL_DIR, PENALTY_PATH, i,
                              minimapPath, poaPath, raconPath))
    [x.result() for x in allResults]
    logger.info('merge all polished reads')
    time.sleep(10)
    os.system(f"""
    cat {FINAL_DIR}* | {seqkitPath} seq -rp > {POLISHED_READ} && sleep 15 &&\
    rm -rf {FINAL_DIR} &&\
    rm -rf {TEMP_DIR}
    """)
Beispiel #53
0
def split_vcf(vcf_path: Path, chunk_size: int) -> Iterable[Path]:
    """
    A simple utility for splitting a VCF file into chunk-sized VCF files.

    Note: all splits keep the original header.

    :param vcf_path: input vcf file.
    :param chunk_size: max number of records in each file.
    :return: paths of tmp split files.
    """
    with vcf_path.open() as vcf_io:
        records, header = partition(lambda line: line.startswith('#'), vcf_io)
        for n, chunk in enumerate(chunked(records, chunk_size)):
            header = header if isinstance(header, tuple) else tuple(header)
            with NamedTemporaryFile(mode='w', suffix='.vcf',
                                    delete=False) as out_io:
                out_io.writelines(header)
                out_io.writelines(chunk)
            yield Path(out_io.name)
Beispiel #54
0
def read_openpose_json(filename: str) -> List[JointDescriptor]:
    """
    Reads json files generated by OpenPose's predictor.
    
    Args:
        filename: String; Full path of the file to read.

    Output:
        Outputs of OpenPose usually contain 25 joints with their confidence.
        ```
        [
            JointDescriptor(
                x=1027.69,
                y=221.108,
                confidence=0.90927,
                joint=<OpenPoseJoints.Nose: 0>
            )
        ]
        ```
        `JointDescriptor` and `OpenPoseJoints` are both classes in this library.
    """
    with open(filename, "rb") as f:
        keypoints_list = []
        keypoints = json.load(f)
        assert (len(keypoints["people"]) == 1
                ), "In all pictures, we should have only one person!"

        points_2d = keypoints["people"][0]["pose_keypoints_2d"]
        assert (len(points_2d) == 25 *
                3), "We have 25 points with (x, y, c); where c is confidence."

        for point_index, (x, y, confidence) in enumerate(chunked(points_2d,
                                                                 3)):
            assert x is not None, "x should be defined"
            assert y is not None, "y should be defined"
            assert confidence is not None, "confidence should be defined"
            keypoints_list.append(
                JointDescriptor(x=x,
                                y=y,
                                confidence=confidence,
                                joint=OpenPoseJoints(point_index)))

        return keypoints_list
Beispiel #55
0
    def add_data(self, values):
        logger.info("Adding data to job...")

        chunks = more_itertools.chunked(iterable=values, n=ClassificationJob.MAX_PAGE_SIZE)
        for index, chunk in enumerate(chunks):
            page_number = index + 1
            rows = self._to_labeled_rows(chunk)

            is_success = self._client.request(
                api="Classifications",
                method="PopulateImport",
                data={
                    "job_id": self.id,
                    "page": page_number,
                    "rows": rows
                }
            )
            logger.info("is_success: {}".format(is_success))
            assert is_success, "Failed to add data."
 def get_health_checks(self) -> Mapping[str, HealthCheck]:
     paginator = self.route53.get_paginator('list_health_checks')
     hcs = chain.from_iterable(page['HealthChecks']
                               for page in paginator.paginate())
     batch_size = 10  # Route53 lets us get tags for at most ten resources at a time
     hc_batches = more_itertools.chunked(hcs, batch_size)
     dcp_hcs = {}
     for hc_batch in hc_batches:
         hc_batch = {hc['Id']: hc for hc in hc_batch}
         response = self.route53.list_tags_for_resources(
             ResourceType='healthcheck', ResourceIds=list(hc_batch.keys()))
         for tag_set in response['ResourceTagSets']:
             assert tag_set['ResourceType'] == 'healthcheck'
             for tag in tag_set['Tags']:
                 if tag['Key'] == 'Name':
                     hc_name = tag['Value']
                     hc_id = tag_set['ResourceId']
                     dcp_hcs[hc_name] = hc_batch[hc_id]
     return dcp_hcs
Beispiel #57
0
def upload_pics(session, pics):
    # Google Photosで一度にアップロードできるのは50枚までなので細切れにする
    # more_itertools.chunkedで、指定された数ごとに細切れにできる 便利!!
    group_by = 40
    pics_splitted = chunked(pics, group_by)

    for pics_elem in pics_splitted:
        successed_uploads = []
        # ファイルをアップロードしてトークンを得る
        # トークンの有効期限は1日らしいので、アップロードに1日単位の時間がかかる場合は処理を見直すかも
        for pict_bin, file_name, description in pics_elem:
            session.headers["Content-type"] = "application/octet-stream"
            session.headers["X-Goog-Upload-Protocol"] = "raw"
            session.headers["X-Goog-Upload-File-Name"] = file_name
            upload_token = session.post(
                'https://photoslibrary.googleapis.com/v1/uploads', pict_bin)

            if upload_token.status_code == 200:
                successed_uploads.append([upload_token, description])
            else:
                print(
                    f'An error occured while uploading file "{file_name}". Response: {upload_token}'
                )

        # バッチ処理(mediaItems:batchCreate)用のリクエストを作る
        batch_request_body = {"newMediaItems": []}
        for upload_token, description in successed_uploads:
            batch_request_body['newMediaItems'].append({
                "description": description,
                "simpleMediaItem": {
                    "uploadToken": upload_token.content.decode()
                }
            })

        batch_request_json = json.dumps(batch_request_body)

        result = session.post(
            'https://photoslibrary.googleapis.com/v1/mediaItems:batchCreate',
            batch_request_json)
        if result.status_code != 200:
            print(
                f'An error occured while batch creating. \nStatus code:{result.status_code} Reason: {result.reason}'
            )
Beispiel #58
0
def import_insights(
    predictions: Iterable[Prediction],
    server_domain: str,
    batch_size: int = 1024,
) -> int:
    product_store = get_product_store()
    imported: int = 0

    prediction_batch: List[Prediction]
    for prediction_batch in chunked(predictions, batch_size):
        with db.atomic():
            imported += import_insights_(
                prediction_batch,
                server_domain,
                automatic=False,
                product_store=product_store,
            )

    return imported
Beispiel #59
0
    def handle_sentry(self, *args, **kwargs):
        existing_jobs = set(scheduled_jobs())
        target = set(UniqueFeed.objects.filter(muted=False).values_list(
            'url', flat=True))

        to_delete = existing_jobs - target
        if to_delete:
            logger.info(
                "Deleting {0} jobs from the scheduler".format(len(to_delete)))
            for job_id in to_delete:
                delete_job(job_id)

        to_add = target - existing_jobs
        if to_add:
            logger.info("Adding {0} jobs to the scheduler".format(len(to_add)))
            for chunk in chunked(to_add, 10000):
                uniques = UniqueFeed.objects.filter(url__in=chunk)
                for unique in uniques:
                    unique.schedule()
Beispiel #60
0
    def handle_sentry(self, *args, **kwargs):
        connection = get_redis_connection()
        existing_jobs = set(scheduled_jobs(connection=connection))
        target = set(UniqueFeed.objects.filter(muted=False).values_list(
            'url', flat=True))

        to_delete = existing_jobs - target
        if to_delete:
            logger.info("deleting jobs from the scheduler",
                        count=len(to_delete))
            for job_id in to_delete:
                delete_job(job_id, connection=connection)

        to_add = target - existing_jobs
        if to_add:
            logger.info("adding jobs to the scheduler", count=len(to_add))
            for chunk in chunked(to_add, 10000):
                uniques = UniqueFeed.objects.filter(url__in=chunk)
                for unique in uniques:
                    unique.schedule()