예제 #1
0
def is_base_feed(dirpath):
    """Check if a directory is a feed directory. It is affirmative if and only
    if (1) it is a directory; (2) contains at least one subdirectory named as a
    4-digit string as the feed version; (3) under the feed version directory
    there is a directory named `versions`; and (4) under `versions` there is
    some all-numeric files or directories

    Args:
        dirpath (str): Directory path to check

    Returns:
        bool for whether a directory looks like a feed directory
    """
    if not os.path.isdir(dirpath):
        return False  # not a directory
    ver_dir = (
        os.listdir(dirpath)
        | where(lambda name: len(name) == 4 and name.isdigit())  # 4-digit
        | select(lambda name: os.path.join(dirpath, name))
        | where(os.path.isdir)  # feed ver is a dir
        | select(lambda path: os.path.join(path, 'versions'))
        | where(os.path.isdir)  # data ver is a dir
        | where(lambda path: any(name.isdigit()
                                 for name in os.listdir(path)))  # all-digit
        | as_list)  # list of "feed_name/0001/versions"
    return False if not ver_dir else True
예제 #2
0
def list_feed_subfeeds(feedroot, feedname, feedver, subfeednames=()):
    """Assume the feed data directory exists. Return the list of subfeed names
    in ascending order

    Args:
        feedroot (str): root dir of the feed repository
        feedname (str): name of feed, also as the directory under feedroot
        feedver (str): feed version, also as the directory under feedname
        subfeednames (list): string of list of strings of partial names of subfeed

    Returns:
        list of list of strings, each is a valid subfeed. If subfeednames is
        provided, only those under the provided partial subfeed name is returned
    """
    feeddir = os.path.join(feedroot, feedname, feedver)
    dirpath = os.path.join(feeddir, *subfeednames)
    if not os.path.isdir(dirpath):
        return []  # not a directory or not exists
    subfeeds = (
        os.walk(dirpath)
        | where(lambda rootdirfile: "versions" in rootdirfile[1])
        | select(lambda rootdirfile: rootdirfile[0][len(feeddir):])
        | sort
        | select(lambda dirname: list(filter(None, dirname.split(os.sep))))
        | where(lambda dirparts: "versions" not in dirparts)
        | as_list)
    return subfeeds
def finalize_data_preparation(total_seq, total_data_percent):
    total_data_size = int(len(total_seq) * total_data_percent / 100)
    training_data_size = int(total_data_size * 60 / 100)

    random.shuffle(total_seq)

    training_data = np.asarray(
        list(total_seq[:training_data_size] | select(lambda x: x[1])))
    training_labels = np.asarray(
        list(total_seq[:training_data_size] | select(lambda x: x[0])))
    test_data = np.asarray(
        list(total_seq[training_data_size:total_data_size]
             | select(lambda x: x[1])))
    test_labels = np.array(
        list(total_seq[training_data_size:total_data_size]
             | select(lambda x: x[0])))
    # VISUALIZATION
    print(f'training seq {len(training_data)}:')
    print(training_data[0])
    print(f'test seq {len(test_data)}:')
    print(test_data[0])
    return {
        'training_data': training_data,
        'training_labels': training_labels,
        'test_data': test_data,
        'test_labels': test_labels
    }
예제 #4
0
def write_to_file(output_file, prices):
    file_exists = os.path.isfile(output_file)
    with open(output_file, "a") as f:
        if not file_exists:
            f.write(",".join(sorted(prices[0].keys())) + "\n")
        for p in prices:
            sorted_values = list(p.items()) | sort() | select(lambda x: x[1])
            f.write((sorted_values | select(str) | join(",")) + "\n")
예제 #5
0
    def match_faces(self, faceencod, known_faces, tol):
        knonwn_encodes = known_faces | select(lambda f: f["encod"]) | tolist
        matches = face_recognition.compare_faces(knonwn_encodes, faceencod,
                                                 tol)

        # Select only matched records
        return zip(matches, known_faces) \
            | where(lambda x: x[0])    \
            | select(lambda m: m[1])   \
            | tolist
def balancedSignalGenerator(X,y,num_classes=12):
    
    class_map = {}
    for c in range(num_classes):
        class_map[c] = list(np.where( y==c)[0])
                            
    D = range(num_classes)         | select( lambda c: class_map[c] | pcycle | select( lambda i: (c,X[i]) )  )         | as_list

    while True:
        for c in D:
            yield next(c)[0], next(c)[1]
예제 #7
0
    def test_right_or(self):
        ror_piped = (range(100) | where(lambda x: x % 2 == 1)
                     | select(lambda x: x**2)
                     | select(lambda x: x - 1)
                     | where(lambda x: x < 50))

        or_pipe = (where(lambda x: x % 2 == 1) | select(lambda x: x**2)
                   | select(lambda x: x - 1)
                   | where(lambda x: x < 50))

        lror = list(ror_piped)
        lor = list(range(100) | or_pipe)
        self.assertEqual(lror, lor)
예제 #8
0
    def __init__(self):
        self.face_database = os.environ.get("FACE_DATABASE", "")
        self.match_tol = float(os.environ.get("FACE_MATCH_TOL", 0.6))

        self.known_faces = self.load_known_faces()
        known_faces_names = self.known_faces | select(
            lambda f: f["name"]) | tolist
        logger.debug(f"known faces: {known_faces_names}")

        super().__init__()
def prepare_data(names, last_names, names_max_len, genders, dates,
                 total_data_percent):
    data_seq = normalize_merge_data(names, last_names, names_max_len, genders,
                                    dates)

    rev_data_seq = data_seq.copy()
    rev_data_seq.reverse()

    shifted_data_seq = data_seq.copy()
    shifted_data_seq.insert(0, shifted_data_seq.pop())

    total_seq = prepare_seq(data_seq, data_seq, 1)
    total_seq += prepare_seq(data_seq, (data_seq | select(randomize)), 1)
    total_seq += prepare_seq(data_seq, (data_seq | select(randomize)), 1)
    total_seq += prepare_seq(data_seq, (data_seq | select(randomize)), 1)
    total_seq += prepare_seq(data_seq, (data_seq | select(randomize)), 1)
    total_seq += prepare_seq(data_seq, rev_data_seq, 0)
    total_seq += prepare_seq(data_seq, shifted_data_seq, 0)

    # total_seq += list(map(lambda x: (x[0], [x[1][3], x[1][4],
    #                   x[1][5], x[1][0], x[1][1], x[1][2]]), total_seq))

    return finalize_data_preparation(total_seq, total_data_percent)
예제 #10
0
def is_feed(dirpath):
    """Similar to is_base_feed(), but also cover feeds with subfeeds
    """
    if not os.path.isdir(dirpath):
        return False  # not a directory
    ver_dir = (
        os.listdir(dirpath)
        | where(lambda name: len(name) == 4 and name.isdigit())  # 4-digit
        | select(lambda name: os.path.join(dirpath, name))
        | where(os.path.isdir)  # feed ver is a dir
        | as_list)  # list of "feed_name/0001/versions"
    for dirname in ver_dir:
        for root, _dirs, files in os.walk(dirname):
            if root.rsplit(os.sep, 1)[-1] == 'versions' and \
               any(name.isdigit() for name in files): # all-digit files
                return True
    return False
예제 #11
0
    def handle_msg(self, msg):
        matches = []
        for new_face in msg.faces:
            # Converting byte format back to NumPy array
            new_face = np.frombuffer(new_face)
            logger.debug(f"type of new_face: {type(new_face)}")
            matches.extend(
                self.match_faces(new_face, self.known_faces, self.match_tol))

        if matches:
            titles = matches | select(lambda m: m["name"]) | tolist
            msg.matched_faces.extend(titles)
            logger.debug(f"match found: {titles}")
            yield True, msg
        else:
            logger.debug("New face found. Updating the database...")
            save_image_data_to_jpg(msg.raw_frame.image_bytes,
                                   outpath=self.face_database)
            self.known_faces = self.load_known_faces()
예제 #12
0
def test_pipe():
    def fib():
        a, b = 0, 1
        while True:
            yield a
            a, b = b, a + b

    # 计算小于4000000的斐波那契数中的偶数之和
    amount = fib() | where(lambda x: x % 2 == 0) | take_while(
        lambda x: x < 4000000) | add()
    print(amount)

    # 读取文件,统计文件中每个单词出现的次数,然后按照次数从高到低对单词排序
    with open('argparse.py') as f:
        fs = f.read()
        print(findall('\w+', fs))
        print(fs
              | Pipe(lambda x: findall('\w+', x))
              # | Pipe(lambda x: (i for i in x if i.strip()))
              | groupby(lambda x: x)
              | select(lambda x: (x[0], (x[1] | count)))
              | sort(key=lambda x: x[1], reverse=True))
예제 #13
0
    def test_parallelepiped(self):
        par0 = (range(100) | where(lambda x: x % 2 == 1)
                | select(lambda x: x**2)
                | select(lambda x: x - 1)
                | where(lambda x: x < 50))

        par1 = (range(100) | where(lambda x: x % 2 == 1)
                | (Parallelepiped() | select(lambda x: x**2))
                | select(lambda x: x - 1)
                | where(lambda x: x < 50))

        par2 = (range(100) | where(lambda x: x % 2 == 1)
                | (Parallelepiped() | select(lambda x: x**2)
                   | select(lambda x: x - 1))
                | where(lambda x: x < 50))

        l0 = list(par0)
        l1 = list(par1)
        l2 = list(par2)
        self.assertEqual(l0, l1)
        self.assertEqual(l0, l2)
예제 #14
0
from pipe import select, where

my_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

print(list(my_list | where(lambda x: x % 2 == 0)))

updated_list = my_list \
    | select(lambda x: x * 3) \
    | where(lambda x: x % 2 == 0)

print(list(updated_list))
def date_to_str_sequence(seq):
    return list(seq | select(lambda x: x.strftime("%Y%m%d")))
예제 #16
0
    discriminator.to_gpu()
    if args.use_vectorizer:
        vectorizer.to_gpu()
    if args.classifier_training_attribute_dataset is not None:
        classifier.to_gpu()

updater = kawaii_creator.updaters.Updater(
    generator=generator, discriminator=discriminator, xp=xp, batchsize=batchsize,
    generator_input_dimentions=GENERATOR_INPUT_DIMENTIONS)
if args.use_vectorizer:
    vectorizer_updater = kawaii_creator.updaters.VectorizerUpdater(vectorizer)
if args.classifier_training_attribute_dataset is not None:
    classifier_updater = kawaii_creator.updaters.ClassifierUpdater(classifier)

count_processed, sum_loss_discriminator, sum_loss_generator, sum_accuracy, sum_loss_classifier, sum_accuracy_classifier = 0, 0, 0, 0, 0, 0
for batch in iterator | pipe.select(xp.array) | pipe.select(chainer.Variable):
    loss_generator = chainer.Variable(xp.zeros((), dtype=xp.float32))
    loss_discriminator = chainer.Variable(xp.zeros((), dtype=xp.float32))
    loss_vectorizer = chainer.Variable(xp.zeros((), dtype=xp.float32))
    loss_classifier = chainer.Variable(xp.zeros((), dtype=xp.float32))

    if args.generator_training:
        # forward
        generated, random_seed = updater.generate_random()
        discriminated_from_generated = updater.discriminator(generated)
        discriminated_from_dataset = updater.discriminator(batch)
        accuracy = updater.discriminator_accuracy(discriminated_from_generated=discriminated_from_generated,
                                                  discriminated_from_dataset=discriminated_from_dataset)
        sum_accuracy += chainer.cuda.to_cpu(accuracy.data)  # update generator
        loss_generator_each = updater.loss_generator(discriminated_from_generated=discriminated_from_generated)
        loss_generator += loss_generator_each
예제 #17
0
파일: fonctors.py 프로젝트: gitthious/ADT
 def test_fonctor(self):
     self.assertFEqual(("a", "bb", "ccc") | select(lambda s: len(s)),
                       (1, 2, 3))
def ConvertIndexToLabel(indexes):
    return list( indexes ) | select( lambda i: imageTypesInverted[i] ) | as_list 
    
    class_map = {}
    for c in range(num_classes):
        class_map[c] = list(np.where( y==c)[0])
                            
    D = range(num_classes)         | select( lambda c: class_map[c] | pcycle | select( lambda i: (c,X[i]) )  )         | as_list

    while True:
        for c in D:
            yield next(c)[0], next(c)[1]

        
data = balancedSignalGenerator(X_train,y_train) | take(4200) | as_list

# note that we need as_list on the data
X_train_bal = data | select(lambda el: el[1])  | as_list | as_npy
y_train_bal = data | select(lambda el: el[0])  | as_list | as_npy


#%%
print( X_train_bal.shape, y_train_bal.shape )

#%% [markdown]
# And we are now balanced!

#%%
def labelDist(y, title):
    plt.figure()
    plt.hist(y,bins=12)
    plt.title(title)
    plt.xticks(range(12),imageTypes.keys(), rotation='vertical')
예제 #20
0
def test_pipe():
    print range(5) | add
    print range(5) | where(lambda x: x % 2 == 0) | add
    print fibonacci() | where(lambda x: x % 2 == 0) | take_while(lambda x: x < 10000) | add
    print fibonacci() | select(lambda x: x ** 2) | take_while(lambda x: x < 100) | as_list
    print fibonacci() | take_while_idx(lambda x: x < 10) | as_list
예제 #21
0
import argparse
import pathlib
import sys
import pipe
import glob

thisfilepath = pathlib.Path(__file__)
sys.path.append(str(thisfilepath.parent.parent.parent))
import chainer_progressive_gan

import train_conditional

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('dataset_glob')
    parser.add_argument('--edge', action=argparse._StoreTrueAction)
    train_conditional.shared_args(parser)
    args = parser.parse_args()
    args.prefix = "edge2img" + ("_edge" if args.edge else "")

    paths = glob.glob(args.dataset_glob) | pipe.select(
        pathlib.Path) | pipe.as_list
    if args.edge:
        dataset = chainer_progressive_gan.datasets.Edge2ImgDataset(
            paths, resize=(args.resize, args.resize))
    else:
        dataset = chainer_progressive_gan.datasets.Sketch2ImgDataset(
            paths, resize=(args.resize, args.resize))
    train_conditional.main(args, dataset)
예제 #22
0
def work(in_train_arch,
         in_test_arch,
         in_train_csv,
         in_test_csv,
         out_h5):

    from pypipes import unzip,as_key,del_key,getitem,setitem
    from nppipes import (genfromtxt,
                         place,astype,as_columns,label_encoder,fit_transform,
                         transform,stack
                         )
    from nppipes import take as np_take
    from numpy.core.defchararray import strip
    from numpy import s_,mean,in1d,putmask
    from collections import Counter
    from h5pipes import h5new


    @P.Pipe
    def replace_missing_with(iterable, ftor):
        from numpy import isnan
        for item in iterable:
            for i in range(item.shape[1]):
                mask = isnan(item[:, i])
                value = ftor(item[~mask, i])
                item[mask, i] = value
                pass
            yield item


    missing_cidx = [11, 14, 16, 28, 33, 34, 35, 36, 37, 46, 51, 60, 68]
    unseen_nominal_cidx = [2, 12, 38, 69, 74]
    seen_nominal_cidx = [0, 1, 4, 5, 6, 13, 15, 17, 18, 19, 20, 21, 22, 23,
                 24, 25, 26, 27, 29, 30, 31, 32, 39, 40, 41, 42, 43, 44, 45,
                 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59,
                 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 75, 76, 77]
    nominal_cidx = seen_nominal_cidx + unseen_nominal_cidx


    data = (
        in_train_arch
        | unzip(in_train_csv)
        | genfromtxt(delimiter=',', dtype=str)
        | place(lambda d: d == '', 'nan')
        | as_key('train')
        | as_key('train_col_names', lambda d: strip(d['train'][0], '"'))
        | as_key('train_labels',    lambda d: d['train'][1:, 0].astype(int))
        | as_key('train_X',         lambda d: d['train'][1:, 1:-1])
        | as_key('train_y',         lambda d: d['train'][1:, -1].astype(int))
        | del_key('train')


        | as_key('test', lambda d:
                in_test_arch
                | unzip(in_test_csv)
                | genfromtxt(delimiter=',', dtype=str)
                | place(lambda d: d == '', 'nan')
                | P.first
                )
        | as_key('test_col_names', lambda d: strip(d['test'][0], '"'))
        | as_key('test_labels',    lambda d: d['test'][1:, 0].astype(int))
        | as_key('test_X',         lambda d: d['test'][1:, 1:])
        | del_key('test')

        | as_key('train_X', lambda d:
                (d['train_X'],)
                | np_take(missing_cidx, axis=1)
                | astype(float)

                | replace_missing_with(mean)

                | astype(str)
                | setitem(d['train_X'].copy(), s_[:, missing_cidx])
                | P.first
                )

        | as_key('label_encoders', lambda d:
                len(nominal_cidx)
                | label_encoder
                | P.as_tuple
                )

        | as_key('train_X', lambda d:
                (d['train_X'],)
                | np_take(nominal_cidx, axis=1)
                | as_columns
                | fit_transform(d['label_encoders'])
                | stack(axis=1)
                | setitem(d['train_X'].copy(), s_[:, nominal_cidx])
                | P.first
                )

        | as_key('test_X', lambda d:
                (d['test_X'],)
                | np_take(seen_nominal_cidx, axis=1)
                | as_columns
                | transform(d['label_encoders'][:-len(unseen_nominal_cidx)])
                | stack(axis=1)
                | setitem(d['test_X'].copy(), s_[:, seen_nominal_cidx])
                | P.first
                )

        | as_key('test_X', lambda d:
                (d['test_X'],)
                | np_take(unseen_nominal_cidx, axis=1)
                | as_key('test_unseen_nominals_features')

                | as_key('test_unseen_nominals', lambda d2:
                        zip(d2['test_unseen_nominals_features'].T,
                            d['label_encoders'][-len(unseen_nominal_cidx):])
                        | P.select(lambda t: list(set(t[0]) - set(t[1].classes_)))
                        | P.as_list
                        )

                | as_key('train_most_common_nominals', lambda d2:
                        zip(d['train_X'][:, unseen_nominal_cidx].T.astype(int),
                            d['label_encoders'][-len(unseen_nominal_cidx):])
                        | P.select(lambda t: t[1].inverse_transform(t[0]))
                        | P.select(lambda s: Counter(s).most_common(1)[0][0])
                        | P.as_list
                        )

                | as_key('test_corrected_features', lambda d2:
                        zip(d2['test_unseen_nominals_features'].copy().T,
                            d2['test_unseen_nominals'],
                            d2['train_most_common_nominals'])
                        | P.select(lambda t: putmask(t[0], in1d(t[0], t[1]), t[2]) or t[0].T)
                        | stack(axis=1)
                        | P.first
                        )

                | getitem('test_corrected_features')
                | as_columns
                | transform(d['label_encoders'][-len(unseen_nominal_cidx):])
                | stack(axis=1)
                | setitem(d['test_X'].copy(), s_[:, unseen_nominal_cidx])
                | P.first
                )

        | del_key('label_encoders')

        | as_key('test_X', lambda d:
                (d['test_X'],)
                | np_take(missing_cidx, axis=1)
                | astype(float)

                | replace_missing_with(mean)

                | astype(str)
                | setitem(d['test_X'].copy(), s_[:, missing_cidx])
                | P.first
                )

        | P.first
        )

    #print(data.keys())

    (
        (out_h5,)
        | h5new
        | as_key('train_X',         lambda _: data['train_X'].astype(float))
        | as_key('train_y',         lambda _: data['train_y'].astype(float))
        | as_key('test_X',          lambda _: data['test_X'].astype(float))
        | as_key('train_labels',    lambda _: data['train_labels'])
        | as_key('test_labels',     lambda _: data['test_labels'])
        | P.first
    )

    return
def to_padded_int_sequence(seq, padding_max_len):
    padded_int_seq = pad_sequences(
        list(seq | select(lambda x: list(x | select(ord)))),
        maxlen=padding_max_len)
    return padded_int_seq
예제 #24
0
# -*- coding: utf-8 -*-
import csv
import argparse
import os
import pipe
import texmex_python

parser = argparse.ArgumentParser()
parser.add_argument("csv_path")
parser.add_argument("fvec_path")
args = parser.parse_args()
assert not os.path.exists(args.fvec_path)

writer = texmex_python.Writer(args.fvec_path, 'f')

with open(args.csv_path) as fr:
    reader = csv.reader(fr)
    for vec in reader | pipe.select(lambda vec: list(map(float, vec))):
        writer.write(vec)
예제 #25
0
 def __init__(self, tsv_path):
     self._data = numpy.array([
         line.split(" ") | pipe.select(lambda x: x == "1")
         | pipe.select(int) | pipe.as_list for line in open(tsv_path)
     ])
예제 #26
0
from pipe import select, where, chain, traverse, groupby, dedup
arr = [1, 2, 3, 4, 5]
print(list(map(lambda x: x * 2, filter(lambda x: x % 2 == 0, arr))))
print(list(arr | where(lambda x: x % 2 == 0) | select(lambda x: x * 2)))

print(list(arr | select(lambda x: x * 2)))
nested = [[1, 2, [3]], [4, 5]]
print(list(nested | chain))
print(list(nested | traverse))
fruits = [
    {
        "name": "apple",
        "price": [2, 5]
    },
    {
        "name": "orange",
        "price": 4
    },
    {
        "name": "grape",
        "price": 5
    },
]
print(list(fruits | select(lambda fruit: fruit["price"]) | traverse))
print(
    list((1, 2, 3, 4, 5, 6, 7, 8, 9)
         | groupby(lambda x: "Even" if x % 2 == 0 else "Odd")
         | select(lambda x: {x[0]: list(x[1])})))
print(
    list((1, 2, 3, 4, 5, 6, 7, 8, 9)
         | groupby(lambda x: "Even" if x % 2 == 0 else "Odd")