Example #1
0
def test_ignore():
    path = os.path.join('blizzard','test','fixture','lieux-de-tournage-de-films-long-metrage-paris.p')
    with open(path, 'rb') as fp:
        dataset = {
            'fields': [],
            'download': pickle.load(fp),
        }
    n.assert_true(ignore(dataset))
Example #2
0
def index_threaded(fp_out):
    datasets = pluplusch(catalogs=dl.catalogs, cache_dir=datadir, proxies=proxies())
    futures = {}
    with ProcessPoolExecutor(4) as e:
        while True:
            if len(futures) < 5:
                try:
                    dataset = next(datasets)
                except StopIteration:
                    pass
                else:
                    if not u.ignore(dataset):
                        futures[(dataset["catalog"], dataset["datasetid"])] = e.submit(meta.snowflake, dataset)

            for key, future in list(futures.items()):
                if future.done():
                    dataset = future.result()
                    fp_out.write(json.dumps(dataset) + "\n")
                    del (futures[key])
                    logger.debug("In line for snowflaking: %s" % futures.keys())

            if futures == []:
                break
Example #3
0
def index(fp_out):
    for dataset in pluplusch(catalogs=dl.catalogs, cache_dir=datadir, proxies=proxies()):
        if not u.ignore(dataset):
            meta.snowflake(dataset)
            fp_out.write(json.dumps(dataset) + "\n")