コード例 #1
0
def test_delete_elem_but_property_always_hold(xs_x):
    xs, x = xs_x
    dyn_arr = dynamic_array(xs)

    prev_len = len(F.lflatten(dyn_arr))
    delete(dyn_arr, x)
    assert prev_len - 1 == len(F.lflatten(dyn_arr))
コード例 #2
0
def test_delete_elements_but_property_always_hold(xs):
    dyn_arr = dynamic_array(xs)
    for x in xs:
        prev_len = len(F.lflatten(dyn_arr))
        delete(dyn_arr, x)
        assert prev_len - 1 == len(F.lflatten(dyn_arr))
        # dynamic array properties
        for arr in dyn_arr:
            assert is_sorted(arr)
            assert is_power_of_two(len(arr))
        assert len(dyn_arr) == len(F.ldistinct(dyn_arr, key=len))
コード例 #3
0
def test_delete_elem_not_in_arr_then_nothing_happen(xs_z):
    xs, z = xs_z
    dyn_arr = dynamic_array(xs)

    prev_len = len(F.lflatten(dyn_arr))
    delete(dyn_arr, z)
    assert prev_len == len(F.lflatten(dyn_arr))
    # dynamic array properties
    for arr in dyn_arr:
        assert is_sorted(arr)
        assert is_power_of_two(len(arr))
    assert len(dyn_arr) == len(F.ldistinct(dyn_arr, key=len))
コード例 #4
0
def extract_tokens_and_features(text,
                                nlp_pipeline,
                                word_processors=None,
                                sentence_processors=None):

    if not isinstance(text, str) and isinstance(text, Iterable):
        text = fp.first(text)

    if not word_processors:
        word_processors = []

    if not sentence_processors:
        sentence_processors = []

    tokens = []
    word_features = defaultdict(list)
    sentence_features = {}

    adaptor_fn = get_adaptor_fn(nlp_pipeline)
    document = adaptor_fn(nlp_pipeline(text))

    for token in document:
        tokens.append(process_token_lemma(token))

        for processor_fn in word_processors:
            word_features[processor_fn.__name__].append(processor_fn(token))

    for processor_fn in sentence_processors:
        sentence_features[processor_fn.__name__] = processor_fn(word_features)

    features = fp.lflatten(
        [sentence_features[item.__name__] for item in sentence_processors])
    features = np.concatenate(features, axis=0)

    return tokens, features
コード例 #5
0
def amix(*streams, **kwargs):
    """Mixes multiple audio streams.

    `streams` is either a list of audio streams, or any number of varargs streams.
    """
    streams = F.lflatten(streams)
    return ffmpeg.filter(streams, 'amix', inputs=len(streams), **kwargs)
コード例 #6
0
def xstack(*streams, layout, **kwargs):
    """Adds an xstack layout for a number of video streams.

    `streams` is either a list of video streams, or any number of varargs streams.

    `layout` is a list with one item per video. Items may be either 2-tuples of (w, h)
    or strings of "w_h", using ffmpeg's xstack layout syntax.

    Example:

        # A 2x2 grid
        xstack(v1, v2, v3, v4, ["0_0", "w0_0", "0_h0", "w0_h0"])

        # Same thing
        xstack([v1, v2, v3, v4], [(0, 0), ("w0", 0), (0, "h0"), ("w0", "h0")])
    """
    streams = F.lflatten(streams)

    assert (len(streams) == len(layout))

    layout_strs = []
    for i, l in enumerate(layout):
        if isinstance(l, (list, tuple)):
            layout_strs.append(f'{l[0]}_{l[1]}')
        else:
            layout_strs.append(str(l))

    return ffmpeg.filter(streams,
                         'xstack',
                         inputs=len(streams),
                         layout='|'.join(layout_strs),
                         **kwargs)
コード例 #7
0
    def __init__(self,
                 pretrained_model_class: PreTrainedModel,
                 pretrained_model_name: str,
                 extra_layers: List[int],
                 dropout_layers: List[float] = None,
                 freeze: bool = False):
        """
        @param  pretrained_model_class: an object of a pre trained model class (e.g., BertModel)
        @param  pretrained_model_name: a pretrained model path (e.g., 'neuralmind/bert-base-portuguese-cased')
        @param  freeze (bool): whether the model should be fine tuned (True) or not (False).
        """
        super(TransformerClassifier, self).__init__()
        # Instantiate  model
        self.model = pretrained_model_class.from_pretrained(
            pretrained_model_name)

        dropout_layers = dropout_layers or [0. for _ in extra_layers]
        assert len(extra_layers) == len(
            dropout_layers
        ), 'Extra Layers and Dropout Layers should have the same length'

        # Adds the size of the output layer
        all_layers = [self.model.config.hidden_size] + extra_layers + [3]
        dropout_layers = [0.] + dropout_layers + [0.]
        # Instantiate layers based on the sizes received
        layers_instances = fp.lflatten(
            [[nn.Linear(prev, layer), nn.ReLU()] +
             ([nn.Dropout(dropout_layers[i])] if dropout_layers[i] > 0 else [])
             for i, (layer, prev) in enumerate(fp.with_prev(all_layers))
             if prev])
        layers_instances = layers_instances[:-1]  # Remove the last ReLU added.
        self.classifier = nn.Sequential(*layers_instances)

        if freeze:
            for param in self.model.parameters():
                param.requires_grad = False
コード例 #8
0
    def make_causal_input(self, lod, map_, silent=True):
        dd = defaultdict(list)
        dd_ = []
        rx = re.compile(r"(\b[-']\b)|[\W_]")
        rxlist = [r'("\\)', r'(\\")']
        rx = re.compile('|'.join(rxlist))
        for i in range(len(lod)):
            line_ = lod[i]['sentence']
            line = re.sub(rx, '', line_)
            line = ' '.join(
                word.strip(string.punctuation) for word in line.split())
            caus = lod[i]['cause']
            caus = re.sub(rx, '', caus)
            caus = ' '.join(
                word.strip(string.punctuation) for word in caus.split())
            effe = lod[i]['effect']
            effe = re.sub(rx, '', effe)
            effe = ' '.join(
                word.strip(string.punctuation) for word in effe.split())

            d = defaultdict(list)
            index = 0
            for idx, w in enumerate(word_tokenize(line)):
                index = line.find(w, index)

                if not index == -1:
                    d[idx].append([w, index])
                    # print(w, index)hu
                    index += len(w)

            d_ = defaultdict(list)
            for idx in d:
                d_[idx].append([tuple([d[idx][0][0], '_']), d[idx][0][1]])

            init_e = line.find(effe)
            init_e = 0 if init_e == -1 else init_e
            init_c = line.find(caus)
            init_c = 0 if init_c == -1 else init_c

            for c, cl in enumerate(word_tokenize(caus)):
                # print('init_c', init_c)
                init_c = line.find(cl, init_c)
                # print('start Cause', init_c)
                stop = line.find(cl, init_c) + len(cl)
                word = line[init_c:stop]
                # print('word', word.upper(), 'el', cl.upper())

                for idx in d_:
                    if int(init_c) == int(d_[idx][0][1]):
                        und_ = defaultdict(list)
                        und_[idx].append(
                            [tuple([word, 'C']),
                             line.find(word, init_c)])
                        d_[idx] = und_[idx]

                init_c += len(cl)
                # print('increment_c', init_c)

            for e, el in enumerate(word_tokenize(effe)):
                # print('init_e', init_e)
                init_e = line.find(el, init_e)
                # print('start Effect', init_e)
                stop = line.find(el, init_e) + len(el)
                word = line[init_e:stop]
                # print('word', word.upper(), 'el', el.upper())

                for idx in d_:
                    if int(init_e) == int(d_[idx][0][1]):
                        und_ = defaultdict(list)
                        und_[idx].append(
                            [tuple([word, 'E']),
                             line.find(word, init_e)])
                        d_[idx] = und_[idx]

                init_e += len(word)
                # print('init_e', init_e)

            dd[i].append(d_)

        for dict_ in dd:
            dd_.append([
                item[0][0] for sub in [[j for j in i.values()]
                                       for i in lflatten(dd[dict_])]
                for item in sub
            ])

        return dd_
コード例 #9
0
ファイル: funcy.py プロジェクト: kfaheem/backpack
def flatten_list():

    some_list = [1, [2, [3, 4], 5], 8, 9]

    print(lflatten(some_list))
コード例 #10
0
ファイル: utils.py プロジェクト: vincent861223/FinCausal
def make_causal_input(lod, map_, silent=True, test=False):
    #TODO replace hardcoded path by map_
    """
    :param lod: list of dictionaries
    :param map_: mapping of tags and values of interest, i.e. [('cause', 'C'), ('effect', 'E')]. The silent tags are by default taggerd as '_'
    :return: dict of list of tuples for each sentence
    """
    if test:
        dd = defaultdict(list)
        dd_ = []
        rx = re.compile(r"(\b[-']\b)|[\W_]")
        rxlist = [r'("\\)', r'(\\")']
        rx = re.compile('|'.join(rxlist))
        for i in range(len(lod)):
            line_ = lod[i]['sentence']
            line = re.sub(rx, '', line_)

            d = defaultdict(list)
            index = 0
            for idx, w in enumerate(word_tokenize(line)):
                index = line.find(w, index)

                if not index == -1:
                    d[idx].append([w, index])
                    #print(w, index)
                    index += len(w)

            d_ = defaultdict(list)
            for idx in d:
                d_[idx].append([tuple([d[idx][0][0], '_']), d[idx][0][1]])

            dd[i].append(d_)

        for dict_ in dd:
            dd_.append([
                item[0][0] for sub in [[j for j in i.values()]
                                       for i in lflatten(dd[dict_])]
                for item in sub
            ])

        return dd_
    else:
        dd = defaultdict(list)
        dd_ = []
        rx = re.compile(r"(\b[-']\b)|[\W_]")
        rxlist = [r'("\\)', r'(\\")']
        rx = re.compile('|'.join(rxlist))
        for i in range(len(lod)):
            line_ = lod[i]['sentence']
            line = re.sub(rx, '', line_)
            caus = lod[i]['cause']
            caus = re.sub(rx, '', caus)
            effe = lod[i]['effect']
            effe = re.sub(rx, '', effe)

            d = defaultdict(list)
            index = 0
            for idx, w in enumerate(word_tokenize(line)):
                index = line.find(w, index)

                if not index == -1:
                    d[idx].append([w, index])
                    #print(w, index)
                    index += len(w)

            d_ = defaultdict(list)
            for idx in d:
                d_[idx].append([tuple([d[idx][0][0], '_']), d[idx][0][1]])

            init_e = line.find(effe)
            init_e = 0 if init_e == -1 else init_e
            init_c = line.find(caus)
            init_c = 0 if init_c == -1 else init_c

            for c, cl in enumerate(word_tokenize(caus)):
                #print('init_c', init_c)
                init_c = line.find(cl, init_c)
                #print('start Cause', init_c)
                stop = line.find(cl, init_c) + len(cl)
                word = line[init_c:stop]
                #print('word', word.upper(), 'el', cl.upper())

                for idx in d_:
                    if int(init_c) == int(d_[idx][0][1]):
                        und_ = defaultdict(list)
                        und_[idx].append(
                            [tuple([word, 'C']),
                             line.find(word, init_c)])
                        d_[idx] = und_[idx]

                init_c += len(cl)
                #print('increment_c', init_c)

            for e, el in enumerate(word_tokenize(effe)):
                #print('init_e', init_e)
                init_e = line.find(el, init_e)
                #print('start Effect', init_e)
                stop = line.find(el, init_e) + len(el)
                word = line[init_e:stop]
                #print('word', word.upper(), 'el', el.upper())

                for idx in d_:
                    if int(init_e) == int(d_[idx][0][1]):
                        und_ = defaultdict(list)
                        und_[idx].append(
                            [tuple([word, 'E']),
                             line.find(word, init_e)])
                        d_[idx] = und_[idx]

                init_e += len(word)
                #print('init_e', init_e)

            dd[i].append(d_)

        for dict_ in dd:
            dd_.append([
                item[0][0] for sub in [[j for j in i.values()]
                                       for i in lflatten(dd[dict_])]
                for item in sub
            ])

        return dd_
コード例 #11
0
def make_causal_input(lod, map_, silent=True, bio=False):
    """
    :param lod: list of dictionaries
    :param map_: mapping of tags and values of interest, i.e. [('cause', 'C'), ('effect', 'E')]. The silent tags are by default taggerd as '_'
    :return: dict of list of tuples for each sentence
    """

    dd = defaultdict(list)
    dd_ = []
    rx = re.compile(r"(\b[-']\b)|[\W_]")
    rxlist = [r'("\\)', r'(\\")']
    rx = re.compile('|'.join(rxlist))
    for i in range(len(lod)):
        line_ = lod[i]['sentence']
        line = re.sub(rx, '', line_)
        ante = lod[i]['cause']
        ante = re.sub(rx, '', ante)
        cons = lod[i]['effect']
        cons = re.sub(rx, '', cons)

        silent or print(line)
        d = defaultdict(list)
        index = 0
        for idx, w in enumerate(word_tokenize(line)):
            index = line.find(w, index)

            if not index == -1:
                d[idx].append([w, index])
                silent or print(w, index)

                index += len(w)

        d_ = defaultdict(list)
        for idx in d:

            d_[idx].append([tuple([d[idx][0][0], '_']), d[idx][0][1]])

            def cut_space(init_t):
                for s_idx, s in enumerate(line[init_t:]):
                    if s != ' ':
                        init_t += s_idx
                        return init_t

            init_a = cut_space(line.find(ante))
            init_c = cut_space(line.find(cons))

            ante_list = word_tokenize(ante)
            for (el_idx, el) in enumerate(ante_list):
                start = line.find(el, init_a)
                # print('start A')
                # print(start)
                # print(int(d_[idx][0][1]))
                stop = line.find(el, init_a) + len(el)
                word = line[start:stop]
                # print(word)
                if int(start) == int(d_[idx][0][1]):
                    und_ = defaultdict(list)
                    if bio and el_idx == 0:
                        und_[idx].append(
                            [tuple([word, 'B-C']),
                             line.find(word, init_a)])
                    elif bio:
                        und_[idx].append(
                            [tuple([word, 'I-C']),
                             line.find(word, init_a)])
                    else:
                        und_[idx].append(
                            [tuple([word, 'C']),
                             line.find(word, init_a)])
                    d_[idx] = und_[idx]
                    break
                # init_a += len(word) # wrong
                init_a = cut_space(init_a + len(word))

            cons_list = word_tokenize(cons)
            for (el_idx, el) in enumerate(cons_list):
                start = line.find(el, init_c)
                # print('start C')
                # print(start)
                # print(int(d_[idx][0][1]))
                stop = line.find(el, init_c) + len(el)
                word = line[start:stop]
                # print(word)
                if int(start) == int(d_[idx][0][1]):
                    und_ = defaultdict(list)
                    if bio and el_idx == 0:
                        und_[idx].append(
                            [tuple([word, 'B-E']),
                             line.find(word, init_c)])
                    elif bio:
                        und_[idx].append(
                            [tuple([word, 'I-E']),
                             line.find(word, init_c)])
                    else:
                        und_[idx].append(
                            [tuple([word, 'E']),
                             line.find(word, init_c)])
                    d_[idx] = und_[idx]
                    break
                # init_c += len(word) # wrong
                init_c = cut_space(init_c + len(word))

        dd[i].append(d_)

    for dict_ in dd:
        dd_.append([
            item[0][0]
            for sub in [[j for j in i.values()] for i in lflatten(dd[dict_])]
            for item in sub
        ])

    return dd_
コード例 #12
0
def test_added_elements_are_all_saved_to_dyn_arr(xs):
    dyn_arr = dynamic_array()
    for num_inseerted, x in enumerate(xs, start=1):
        insert(dyn_arr, x)
        assert len(F.lflatten(dyn_arr)) == num_inseerted
    assert set(F.flatten(dyn_arr)) == set(xs)