Ejemplo n.º 1
0
def is_base_feed(dirpath):
    """Check if a directory is a feed directory. It is affirmative if and only
    if (1) it is a directory; (2) contains at least one subdirectory named as a
    4-digit string as the feed version; (3) under the feed version directory
    there is a directory named `versions`; and (4) under `versions` there is
    some all-numeric files or directories

    Args:
        dirpath (str): Directory path to check

    Returns:
        bool for whether a directory looks like a feed directory
    """
    if not os.path.isdir(dirpath):
        return False  # not a directory
    ver_dir = (
        os.listdir(dirpath)
        | where(lambda name: len(name) == 4 and name.isdigit())  # 4-digit
        | select(lambda name: os.path.join(dirpath, name))
        | where(os.path.isdir)  # feed ver is a dir
        | select(lambda path: os.path.join(path, 'versions'))
        | where(os.path.isdir)  # data ver is a dir
        | where(lambda path: any(name.isdigit()
                                 for name in os.listdir(path)))  # all-digit
        | as_list)  # list of "feed_name/0001/versions"
    return False if not ver_dir else True
Ejemplo n.º 2
0
def list_feed_subfeeds(feedroot, feedname, feedver, subfeednames=()):
    """Assume the feed data directory exists. Return the list of subfeed names
    in ascending order

    Args:
        feedroot (str): root dir of the feed repository
        feedname (str): name of feed, also as the directory under feedroot
        feedver (str): feed version, also as the directory under feedname
        subfeednames (list): string of list of strings of partial names of subfeed

    Returns:
        list of list of strings, each is a valid subfeed. If subfeednames is
        provided, only those under the provided partial subfeed name is returned
    """
    feeddir = os.path.join(feedroot, feedname, feedver)
    dirpath = os.path.join(feeddir, *subfeednames)
    if not os.path.isdir(dirpath):
        return []  # not a directory or not exists
    subfeeds = (
        os.walk(dirpath)
        | where(lambda rootdirfile: "versions" in rootdirfile[1])
        | select(lambda rootdirfile: rootdirfile[0][len(feeddir):])
        | sort
        | select(lambda dirname: list(filter(None, dirname.split(os.sep))))
        | where(lambda dirparts: "versions" not in dirparts)
        | as_list)
    return subfeeds
Ejemplo n.º 3
0
    def test_right_or(self):
        ror_piped = (range(100) | where(lambda x: x % 2 == 1)
                     | select(lambda x: x**2)
                     | select(lambda x: x - 1)
                     | where(lambda x: x < 50))

        or_pipe = (where(lambda x: x % 2 == 1) | select(lambda x: x**2)
                   | select(lambda x: x - 1)
                   | where(lambda x: x < 50))

        lror = list(ror_piped)
        lor = list(range(100) | or_pipe)
        self.assertEqual(lror, lor)
Ejemplo n.º 4
0
def list_feed_data_vers(feedroot, feedname, feedver, subfeednames=None):
    """Assume the feed data directory exists. Return the list of data versions
    in descending order

    Args:
        feedroot (str): root dir of the feed repository
        feedname (str): name of feed, also as the directory under feedroot
        feedver (str): feed version, also as the directory under feedname
        subfeednames (list): string of list of strings of names of subfeed

    Returns:
        list of strings, each is a valid feed version
    """
    if subfeednames:
        if isinstance(subfeednames, str):
            subfeednames = [subfeednames]
        dirpath = [feedroot, feedname, feedver] + subfeednames + ['versions']
        dirpath = os.path.join(*dirpath)
    else:
        dirpath = os.path.join(feedroot, feedname, feedver, 'versions')
    if not os.path.isdir(dirpath):
        return []  # not a directory or not exists
    vers = (os.listdir(dirpath)
            | where(lambda name: name.isdigit())
            | sort
            | reverse
            | as_list)
    return vers
Ejemplo n.º 5
0
def is_feed(dirpath):
    """Similar to is_base_feed(), but also cover feeds with subfeeds
    """
    if not os.path.isdir(dirpath):
        return False  # not a directory
    ver_dir = (
        os.listdir(dirpath)
        | where(lambda name: len(name) == 4 and name.isdigit())  # 4-digit
        | select(lambda name: os.path.join(dirpath, name))
        | where(os.path.isdir)  # feed ver is a dir
        | as_list)  # list of "feed_name/0001/versions"
    for dirname in ver_dir:
        for root, _dirs, files in os.walk(dirname):
            if root.rsplit(os.sep, 1)[-1] == 'versions' and \
               any(name.isdigit() for name in files): # all-digit files
                return True
    return False
Ejemplo n.º 6
0
    def match_faces(self, faceencod, known_faces, tol):
        knonwn_encodes = known_faces | select(lambda f: f["encod"]) | tolist
        matches = face_recognition.compare_faces(knonwn_encodes, faceencod,
                                                 tol)

        # Select only matched records
        return zip(matches, known_faces) \
            | where(lambda x: x[0])    \
            | select(lambda m: m[1])   \
            | tolist
Ejemplo n.º 7
0
def list_feed_vers(feedroot, feedname):
    """Assume the feed directory exists. Return the list of feed versions in
    descending order

    Args:
        feedroot (str): root dir of the feed repository
        feedname (str): name of feed, also as the directory under feedroot

    Returns:
        list of strings, each is a valid feed version
    """
    dirpath = os.path.join(feedroot, feedname)
    vers = (os.listdir(dirpath)
            | where(lambda name: len(name) == 4 and name.isdigit())
            | where(lambda path: os.path.isdir(os.path.join(dirpath, path)))
            | sort
            | reverse
            | as_list)
    return vers
Ejemplo n.º 8
0
def list_feeds(feedroot):
    """List out names of all feeds

    Args:
        feedroot (str): root dir of the feed repository

    Returns:
        list of strings, each is a valid feed name
    """
    assert os.path.isdir(feedroot)
    names = (os.listdir(feedroot)
             | where(lambda name: is_feed(os.path.join(feedroot, name)))
             | sort
             | as_list)
    return names
Ejemplo n.º 9
0
def main(args: argparse.Namespace):
    images = list(
        sorted(
            glob.glob(
                os.path.join(args.directory, 'preview', 'image[0-9]*.png'))))

    log = json.load(open(os.path.join(
        args.directory,
        'log',
    )))

    cv_images = []
    for image_path in images:
        image = cv2.imread(image_path)
        iteration = int(re.match('.*image([0-9]+).png', image_path).group(1))

        meta = (log | pipe.where(lambda x: x['iteration'] >= iteration)
                | pipe.first)
        stage = meta['stage']

        w, h, _ = image.shape
        resolution = w // 10
        image = cv2.resize(image[:int(w // 10) * 3, :int(h // 10) * 3],
                           (args.resize, args.resize),
                           interpolation=cv2.INTER_NEAREST)

        w, h, _ = image.shape
        cv2.putText(image,
                    'stage{}'.format(int(math.floor(stage)) + 1),
                    (0, (h // 4) * 3),
                    cv2.FONT_HERSHEY_PLAIN,
                    args.resize // 60, (255, 128, 128),
                    thickness=args.resize // 30)

        cv2.putText(image,
                    '{}x{}'.format(resolution, resolution), (0, h),
                    cv2.FONT_HERSHEY_PLAIN,
                    args.resize // 60, (255, 128, 255),
                    thickness=args.resize // 30)
        cv_images.append(image)
    agif(cv_images,
         duration=args.duration,
         filename=os.path.join(
             args.directory,
             'preview.gif',
         ))
Ejemplo n.º 10
0
def test_pipe():
    def fib():
        a, b = 0, 1
        while True:
            yield a
            a, b = b, a + b

    # 计算小于4000000的斐波那契数中的偶数之和
    amount = fib() | where(lambda x: x % 2 == 0) | take_while(
        lambda x: x < 4000000) | add()
    print(amount)

    # 读取文件,统计文件中每个单词出现的次数,然后按照次数从高到低对单词排序
    with open('argparse.py') as f:
        fs = f.read()
        print(findall('\w+', fs))
        print(fs
              | Pipe(lambda x: findall('\w+', x))
              # | Pipe(lambda x: (i for i in x if i.strip()))
              | groupby(lambda x: x)
              | select(lambda x: (x[0], (x[1] | count)))
              | sort(key=lambda x: x[1], reverse=True))
Ejemplo n.º 11
0
    def test_parallelepiped(self):
        par0 = (range(100) | where(lambda x: x % 2 == 1)
                | select(lambda x: x**2)
                | select(lambda x: x - 1)
                | where(lambda x: x < 50))

        par1 = (range(100) | where(lambda x: x % 2 == 1)
                | (Parallelepiped() | select(lambda x: x**2))
                | select(lambda x: x - 1)
                | where(lambda x: x < 50))

        par2 = (range(100) | where(lambda x: x % 2 == 1)
                | (Parallelepiped() | select(lambda x: x**2)
                   | select(lambda x: x - 1))
                | where(lambda x: x < 50))

        l0 = list(par0)
        l1 = list(par1)
        l2 = list(par2)
        self.assertEqual(l0, l1)
        self.assertEqual(l0, l2)
Ejemplo n.º 12
0
def filter_positive_result(res):
    result = [(i, x[0]) if x[0] > 0.65 else (-1, -1)
              for i, x in enumerate(res)]
    result = list(result | where(lambda x: x[0] >= 0))
    result.sort(key=lambda x: x[1], reverse=True)
    return result
Ejemplo n.º 13
0
def generate(config):
    texts = {}
    docs = OrderedDict()
    for day in range(0, config.days):
        date = config.startdate - datetime.timedelta(days=day)
        label = config.indexlabel.format(**locals())
        results = OrderedDict()
        indexfilename = config.indexfile.format(**locals())
        docs[label] = indexfilename
        dayquery = copy.deepcopy(config.basequery)
        dayquery["fq"].append(
            date.strftime(
                'createdAt:[%Y-%m-%dT00:00:00.000Z TO %Y-%m-%dT23:59:59.999Z]')
        )
        for (k, n) in config.branchen.items():
            nq = copy.deepcopy(dayquery)
            nq["fq"].append('+sectors:"{0}"'.format(k))
            res = list(
                neofonie.query("*", **nq)["response"]["docs"]
                | datapipeline.rename_attributes(config.rename)
                | pipe.where(config.filter)
                | datapipeline.deduplicate(key=lambda a: a["title"])
                | datapipeline.default_attributes(('sourcelink', 'source',
                                                   'subtitle'))
                | datapipeline.call(add_sectors_to_subtitle))
            logging.debug("Sector: %s - %s - %s docs" %
                          (k, date.strftime("%Y-%m-%d"), len(res)))
            for item in res:
                logging.debug(
                    "     %s %s %s" %
                    (item["sectors"], item["title"], item["text"][:30]))
            if len(res) > 0:
                results[k] = dict(docs=res, label=n)
        for nr in results.values():
            for doc in nr["docs"]:
                filename = config.docfile.format(**locals())
                doc["document"] = filename
                ndoc = copy.deepcopy(doc)
                ndoc["index"] = os.path.join("..", indexfilename)
                ndoc["sector"] = doc["sectors"][0]
                ndoc["root"] = os.path.join("..", config.rootfile)
                ndoc["source"] = "ex neoApplication"
                ndoc["sourcelink"] = "ex neoURL"
                ndoc["subtitle"] = "Untertitel zu {}".format(
                    ndoc.get("title", "---"))
                texts[os.path.join(config.directory, filename)] = ndoc
                if "text" in doc:
                    del (doc["text"])
        with mkdirs_and_open(os.path.join(config.directory, indexfilename),
                             "w") as of:
            json.dump(
                dict(news=results,
                     root=config.rootfile,
                     rootlabel=config.rootlabel), of)
            logging.info("%s items written to %s" %
                         (reduce(lambda a, b: a + b,
                                 (len(a["docs"])
                                  for a in results.values()), 0), of.name))

    for (k, v) in texts.items():
        json.dump(v, mkdirs_and_open(k, "w"))
    logging.debug("%s news objects written" % len(list(texts.keys())))
    t = copy.deepcopy(config.template)
    t["chapters"] = docs
    json.dump(t, open(os.path.join(config.directory, config.rootfile), "w"))
Ejemplo n.º 14
0
from pipe import select, where

my_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

print(list(my_list | where(lambda x: x % 2 == 0)))

updated_list = my_list \
    | select(lambda x: x * 3) \
    | where(lambda x: x % 2 == 0)

print(list(updated_list))
Ejemplo n.º 15
0
__author__ = 'ipetrash'

## Example of using pipe module
# https://github.com/JulienPalard/Pipe
# ru: http://habrahabr.ru/post/117679/
import pipe

if __name__ == '__main__':
    print((i for i in range(10)) | pipe.as_list)  # tuple to list
    print([i for i in range(10)] | pipe.as_tuple)  # list to tuple
    print(((1, 1), ('a', 2), (3, 'd')) | pipe.as_dict)  # tuple to dict

    print()
    # list of even numbers
    l = (i
         for i in range(10)) | pipe.where(lambda x: x % 2 is 0) | pipe.as_list
    c = l | pipe.count  # count elements
    print("List: {}, count: {}".format(l, c))
    print()

    # custom pipe:
    @pipe.Pipe
    def custom_add(x):
        return sum(x)

    print([1, 2, 3, 4] | custom_add)  # = 10
Ejemplo n.º 16
0
from pipe import where, concat, as_list
from csv_loader import load_files
from eyecandy import print_dna, print_name

pokemon, moves = load_files()

#print_name()
#print_dna()
#print pokemon["snorlax"], pokemon["snorlax"].movepool

# Not really Pythonic, but still really awesome
print   (
            pokemon.values()
            | where(lambda x: x.type1 == "Grass")
            | where(lambda x: x.type2 == "Steel")
            | as_list
        )
Ejemplo n.º 17
0
def main(args: argparse.Namespace, dataset):
    result_directory_name = "_".join([
        args.prefix,
        "resize{}".format(args.resize),
        "stage{}".format(args.initial_stage),
        "batch{}".format(args.batchsize),
        "stginterval{}".format(args.stage_interval),
        "latent{}".format("ON" if args.use_latent else "OFF"),
        str(int(time.time())),
    ] | pipe.where(lambda x: len(x) > 0))
    result_directory = args.out / result_directory_name
    chainer_gan_lib.common.record.record_setting(str(result_directory))

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
    generator, generator_smooth, discriminator, vectorizer = tools.load_models(
        resize=args.resize,
        use_latent=args.use_latent,
        input_channel=args.input_channel,
        pretrained_generator=args.pretrained_generator,
        pretrained_vectorizer=args.pretrained_vectorizer)

    # if args.resize == 32:
    #     channel_evolution = (512, 512, 512, 256)
    # elif args.resize == 128:
    #     channel_evolution = (512, 512, 512, 512, 256, 128)
    # elif args.resize == 256:
    #     channel_evolution = (512, 512, 512, 256, 128, 64, 32)
    # elif args.resize == 512:
    #     channel_evolution = (512, 512, 512, 512, 256, 128, 64, 32)
    # elif args.resize == 1024:
    #     channel_evolution = (512, 512, 512, 512, 256, 128, 64, 32, 16)
    # else:
    #     raise Exception()
    #
    # generator = chainer_progressive_gan.models.progressive_generator.ProgressiveGenerator(
    #     channel_evolution=channel_evolution, conditional=True)
    # generator_smooth = chainer_progressive_gan.models.progressive_generator.ProgressiveGenerator(
    #     channel_evolution=channel_evolution, conditional=True)
    # discriminator = chainer_progressive_gan.models.ProgressiveDiscriminator(
    #     pooling_comp=args.pooling_comp, channel_evolution=channel_evolution, first_channel=args.input_channel + 3)
    # vectorizer = chainer_progressive_gan.models.ProgressiveVectorizer(
    #     pooling_comp=args.pooling_comp, channel_evolution=channel_evolution, first_channel=args.input_channel,
    #     use_both_conditional_and_latent=args.use_latent)
    train_iter = chainer.iterators.MultithreadIterator(dataset, args.batchsize)

    # select GPU
    if args.gpu >= 0:
        generator.to_gpu()
        generator_smooth.to_gpu()
        discriminator.to_gpu()
        vectorizer.to_gpu()
        print("use gpu {}".format(args.gpu))

    chainer_gan_lib.common.misc.copy_param(generator_smooth, generator)
    opt_gen = train.make_optimizer(generator)
    opt_dis = train.make_optimizer(discriminator)
    opt_vec = train.make_optimizer(vectorizer)

    updater = chainer_progressive_gan.updaters.ConditionalProgressiveUpdater(
        resolution=args.resize,
        models=(vectorizer, generator, discriminator, generator_smooth),
        iterator={'main': train_iter},
        optimizer={
            'opt_vec': opt_vec,
            'opt_gen': opt_gen,
            'opt_dis': opt_dis
        },
        device=args.gpu,
        n_dis=args.n_dis,
        lam=args.lam,
        gamma=args.gamma,
        smoothing=args.generator_smoothing,
        initial_stage=args.initial_stage,
        stage_interval=args.stage_interval)
    report_keys = [
        "stage", "loss_dis", "loss_gp", "loss_gen", "g", "inception_mean",
        "inception_std", "FID"
    ]
    trainer = chainer.training.Trainer(updater, (args.max_iter, 'iteration'),
                                       out=str(result_directory))
    trainer.extend(chainer.training.extensions.snapshot_object(
        generator, 'generator_{.updater.iteration}.npz'),
                   trigger=(args.snapshot_interval, 'iteration'))
    trainer.extend(chainer.training.extensions.snapshot_object(
        generator_smooth, 'generator_smooth_{.updater.iteration}.npz'),
                   trigger=(args.snapshot_interval, 'iteration'))
    trainer.extend(chainer.training.extensions.snapshot_object(
        discriminator, 'discriminator_{.updater.iteration}.npz'),
                   trigger=(args.snapshot_interval, 'iteration'))
    trainer.extend(chainer.training.extensions.snapshot_object(
        vectorizer, 'vectorizer_{.updater.iteration}.npz'),
                   trigger=(args.snapshot_interval, 'iteration'))

    trainer.extend(
        chainer.training.extensions.LogReport(keys=report_keys,
                                              trigger=(args.display_interval,
                                                       'iteration')))
    trainer.extend(chainer.training.extensions.PrintReport(report_keys),
                   trigger=(args.display_interval, 'iteration'))
    trainer.extend(
        chainer_progressive_gan.training.GenerateSampleWithCondition(
            vectorizer,
            generator,
            input_dataset=dataset,
            output_dir=result_directory,
            rows=3,
            cols=3),
        trigger=(args.out_image_interval, 'iteration'))
    # trainer.extend(sample_generate(generator_smooth, result_directory),
    #                trigger=(args.out_image_interval, 'iteration'),
    #                priority=extension.PRIORITY_WRITER)
    # trainer.extend(sample_generate_light(generator_smooth, result_directory),
    #                trigger=(args.evaluation_interval // 10, 'iteration'),
    #                priority=extension.PRIORITY_WRITER)
    # trainer.extend(calc_inception(generator_smooth), trigger=(args.evaluation_interval, 'iteration'),
    #                priority=extension.PRIORITY_WRITER)
    # trainer.extend(calc_FID(generator_smooth), trigger=(args.evaluation_interval, 'iteration'),
    #                priority=extension.PRIORITY_WRITER)
    trainer.extend(chainer.training.extensions.ProgressBar(update_interval=10))
    trainer.run()
Ejemplo n.º 18
0
def test_pipe():
    print range(5) | add
    print range(5) | where(lambda x: x % 2 == 0) | add
    print fibonacci() | where(lambda x: x % 2 == 0) | take_while(lambda x: x < 10000) | add
    print fibonacci() | select(lambda x: x ** 2) | take_while(lambda x: x < 100) | as_list
    print fibonacci() | take_while_idx(lambda x: x < 10) | as_list
    fx.write_videofile(nfn)
    clip.close()

def load_resize(x):
    fn = x['filename']
    nfn = fn.replace('.full.mp4','.resized.mp4')
    x['filename'] = nfn
    if os.path.isfile(nfn):
        print("Loading resized {}".format(nfn))
        vc = VideoFileClip(nfn)
        return vc
    else:
        print("Resizing {}".format(fn))
        vc = VideoFileClip(fn).fx(vfx.resize, width=video_width)
        vc.write_videofile(nfn)
        return vc
    
def resize(x):
    v = VideoFileClip(x)
    vfxc = v.fx(vfx.resize, width=video_width)
    return (v, vfxc)

if __name__ == "__main__":
    (mp.get_datastream(data_dir,ext=".full.mp4")
     | where( lambda f: not os.path.isfile( f['filename'].replace(".full.mp4",".resized.mp4") ) )
     | mp.apply('filename','video', resize )
     | cachecomputex(".full.mp4",".resized.mp4",resize_save,lambda x,nx: print("Skipping {}".format(x['filename'])))
     | execute
    )

Ejemplo n.º 20
0
args = parser.parse_args()

GENERATOR_INPUT_DIMENTIONS = 100
outdirname = "_".join([
                          args.outprefix,
                          "finetune" if args.pretrained_generator is not None else "",
                          "batch{}".format(args.batchsize),
                          "accthresh" if args.use_accuracy_threshold else "",
                          "withvec" if args.use_vectorizer else "",
                          "disable_gen_train" if not args.generator_training else "",
                          "finetunevectorizer" if args.pretrained_vectorizer is not None else "",
                          "withvectrain" if args.vectorizer_training_dataset is not None else "",
                          "withattributeclassifier" if args.classifier_training_attribute_dataset is not None else "",
                          "withtrain" if args.vectorizer_training_dataset is not None else "",
                          str(int(time.time())),
                      ] | pipe.where(lambda x: len(x) > 0))

OUTPUT_DIRECTORY = os.path.join(os.path.dirname(__file__), "..", "output", outdirname)
os.makedirs(OUTPUT_DIRECTORY)

logging.basicConfig(filename=os.path.join(OUTPUT_DIRECTORY, "log.txt"), level=logging.DEBUG)
console = logging.StreamHandler()
logging.getLogger('').addHandler(console)

logging.info(args)
if args.pretrained_generator is not None:
    logging.info("pretrained_generator: {}".format(os.path.abspath(args.pretrained_generator)))
if args.pretrained_discriminator is not None:
    logging.info("pretrained_discriminator: {}".format(os.path.abspath(args.pretrained_discriminator)))
if args.pretrained_vectorizer is not None:
    logging.info("pretrained_vectorizer: {}".format(os.path.abspath(args.pretrained_vectorizer)))
Ejemplo n.º 21
0
from pipe import select, where, chain, traverse, groupby, dedup
arr = [1, 2, 3, 4, 5]
print(list(map(lambda x: x * 2, filter(lambda x: x % 2 == 0, arr))))
print(list(arr | where(lambda x: x % 2 == 0) | select(lambda x: x * 2)))

print(list(arr | select(lambda x: x * 2)))
nested = [[1, 2, [3]], [4, 5]]
print(list(nested | chain))
print(list(nested | traverse))
fruits = [
    {
        "name": "apple",
        "price": [2, 5]
    },
    {
        "name": "orange",
        "price": 4
    },
    {
        "name": "grape",
        "price": 5
    },
]
print(list(fruits | select(lambda fruit: fruit["price"]) | traverse))
print(
    list((1, 2, 3, 4, 5, 6, 7, 8, 9)
         | groupby(lambda x: "Even" if x % 2 == 0 else "Odd")
         | select(lambda x: {x[0]: list(x[1])})))
print(
    list((1, 2, 3, 4, 5, 6, 7, 8, 9)
         | groupby(lambda x: "Even" if x % 2 == 0 else "Odd")