def test_delete_elem_but_property_always_hold(xs_x): xs, x = xs_x dyn_arr = dynamic_array(xs) prev_len = len(F.lflatten(dyn_arr)) delete(dyn_arr, x) assert prev_len - 1 == len(F.lflatten(dyn_arr))
def test_delete_elements_but_property_always_hold(xs): dyn_arr = dynamic_array(xs) for x in xs: prev_len = len(F.lflatten(dyn_arr)) delete(dyn_arr, x) assert prev_len - 1 == len(F.lflatten(dyn_arr)) # dynamic array properties for arr in dyn_arr: assert is_sorted(arr) assert is_power_of_two(len(arr)) assert len(dyn_arr) == len(F.ldistinct(dyn_arr, key=len))
def test_delete_elem_not_in_arr_then_nothing_happen(xs_z): xs, z = xs_z dyn_arr = dynamic_array(xs) prev_len = len(F.lflatten(dyn_arr)) delete(dyn_arr, z) assert prev_len == len(F.lflatten(dyn_arr)) # dynamic array properties for arr in dyn_arr: assert is_sorted(arr) assert is_power_of_two(len(arr)) assert len(dyn_arr) == len(F.ldistinct(dyn_arr, key=len))
def extract_tokens_and_features(text, nlp_pipeline, word_processors=None, sentence_processors=None): if not isinstance(text, str) and isinstance(text, Iterable): text = fp.first(text) if not word_processors: word_processors = [] if not sentence_processors: sentence_processors = [] tokens = [] word_features = defaultdict(list) sentence_features = {} adaptor_fn = get_adaptor_fn(nlp_pipeline) document = adaptor_fn(nlp_pipeline(text)) for token in document: tokens.append(process_token_lemma(token)) for processor_fn in word_processors: word_features[processor_fn.__name__].append(processor_fn(token)) for processor_fn in sentence_processors: sentence_features[processor_fn.__name__] = processor_fn(word_features) features = fp.lflatten( [sentence_features[item.__name__] for item in sentence_processors]) features = np.concatenate(features, axis=0) return tokens, features
def amix(*streams, **kwargs): """Mixes multiple audio streams. `streams` is either a list of audio streams, or any number of varargs streams. """ streams = F.lflatten(streams) return ffmpeg.filter(streams, 'amix', inputs=len(streams), **kwargs)
def xstack(*streams, layout, **kwargs): """Adds an xstack layout for a number of video streams. `streams` is either a list of video streams, or any number of varargs streams. `layout` is a list with one item per video. Items may be either 2-tuples of (w, h) or strings of "w_h", using ffmpeg's xstack layout syntax. Example: # A 2x2 grid xstack(v1, v2, v3, v4, ["0_0", "w0_0", "0_h0", "w0_h0"]) # Same thing xstack([v1, v2, v3, v4], [(0, 0), ("w0", 0), (0, "h0"), ("w0", "h0")]) """ streams = F.lflatten(streams) assert (len(streams) == len(layout)) layout_strs = [] for i, l in enumerate(layout): if isinstance(l, (list, tuple)): layout_strs.append(f'{l[0]}_{l[1]}') else: layout_strs.append(str(l)) return ffmpeg.filter(streams, 'xstack', inputs=len(streams), layout='|'.join(layout_strs), **kwargs)
def __init__(self, pretrained_model_class: PreTrainedModel, pretrained_model_name: str, extra_layers: List[int], dropout_layers: List[float] = None, freeze: bool = False): """ @param pretrained_model_class: an object of a pre trained model class (e.g., BertModel) @param pretrained_model_name: a pretrained model path (e.g., 'neuralmind/bert-base-portuguese-cased') @param freeze (bool): whether the model should be fine tuned (True) or not (False). """ super(TransformerClassifier, self).__init__() # Instantiate model self.model = pretrained_model_class.from_pretrained( pretrained_model_name) dropout_layers = dropout_layers or [0. for _ in extra_layers] assert len(extra_layers) == len( dropout_layers ), 'Extra Layers and Dropout Layers should have the same length' # Adds the size of the output layer all_layers = [self.model.config.hidden_size] + extra_layers + [3] dropout_layers = [0.] + dropout_layers + [0.] # Instantiate layers based on the sizes received layers_instances = fp.lflatten( [[nn.Linear(prev, layer), nn.ReLU()] + ([nn.Dropout(dropout_layers[i])] if dropout_layers[i] > 0 else []) for i, (layer, prev) in enumerate(fp.with_prev(all_layers)) if prev]) layers_instances = layers_instances[:-1] # Remove the last ReLU added. self.classifier = nn.Sequential(*layers_instances) if freeze: for param in self.model.parameters(): param.requires_grad = False
def make_causal_input(self, lod, map_, silent=True): dd = defaultdict(list) dd_ = [] rx = re.compile(r"(\b[-']\b)|[\W_]") rxlist = [r'("\\)', r'(\\")'] rx = re.compile('|'.join(rxlist)) for i in range(len(lod)): line_ = lod[i]['sentence'] line = re.sub(rx, '', line_) line = ' '.join( word.strip(string.punctuation) for word in line.split()) caus = lod[i]['cause'] caus = re.sub(rx, '', caus) caus = ' '.join( word.strip(string.punctuation) for word in caus.split()) effe = lod[i]['effect'] effe = re.sub(rx, '', effe) effe = ' '.join( word.strip(string.punctuation) for word in effe.split()) d = defaultdict(list) index = 0 for idx, w in enumerate(word_tokenize(line)): index = line.find(w, index) if not index == -1: d[idx].append([w, index]) # print(w, index)hu index += len(w) d_ = defaultdict(list) for idx in d: d_[idx].append([tuple([d[idx][0][0], '_']), d[idx][0][1]]) init_e = line.find(effe) init_e = 0 if init_e == -1 else init_e init_c = line.find(caus) init_c = 0 if init_c == -1 else init_c for c, cl in enumerate(word_tokenize(caus)): # print('init_c', init_c) init_c = line.find(cl, init_c) # print('start Cause', init_c) stop = line.find(cl, init_c) + len(cl) word = line[init_c:stop] # print('word', word.upper(), 'el', cl.upper()) for idx in d_: if int(init_c) == int(d_[idx][0][1]): und_ = defaultdict(list) und_[idx].append( [tuple([word, 'C']), line.find(word, init_c)]) d_[idx] = und_[idx] init_c += len(cl) # print('increment_c', init_c) for e, el in enumerate(word_tokenize(effe)): # print('init_e', init_e) init_e = line.find(el, init_e) # print('start Effect', init_e) stop = line.find(el, init_e) + len(el) word = line[init_e:stop] # print('word', word.upper(), 'el', el.upper()) for idx in d_: if int(init_e) == int(d_[idx][0][1]): und_ = defaultdict(list) und_[idx].append( [tuple([word, 'E']), line.find(word, init_e)]) d_[idx] = und_[idx] init_e += len(word) # print('init_e', init_e) dd[i].append(d_) for dict_ in dd: dd_.append([ item[0][0] for sub in [[j for j in i.values()] for i in lflatten(dd[dict_])] for item in sub ]) return dd_
def flatten_list(): some_list = [1, [2, [3, 4], 5], 8, 9] print(lflatten(some_list))
def make_causal_input(lod, map_, silent=True, test=False): #TODO replace hardcoded path by map_ """ :param lod: list of dictionaries :param map_: mapping of tags and values of interest, i.e. [('cause', 'C'), ('effect', 'E')]. The silent tags are by default taggerd as '_' :return: dict of list of tuples for each sentence """ if test: dd = defaultdict(list) dd_ = [] rx = re.compile(r"(\b[-']\b)|[\W_]") rxlist = [r'("\\)', r'(\\")'] rx = re.compile('|'.join(rxlist)) for i in range(len(lod)): line_ = lod[i]['sentence'] line = re.sub(rx, '', line_) d = defaultdict(list) index = 0 for idx, w in enumerate(word_tokenize(line)): index = line.find(w, index) if not index == -1: d[idx].append([w, index]) #print(w, index) index += len(w) d_ = defaultdict(list) for idx in d: d_[idx].append([tuple([d[idx][0][0], '_']), d[idx][0][1]]) dd[i].append(d_) for dict_ in dd: dd_.append([ item[0][0] for sub in [[j for j in i.values()] for i in lflatten(dd[dict_])] for item in sub ]) return dd_ else: dd = defaultdict(list) dd_ = [] rx = re.compile(r"(\b[-']\b)|[\W_]") rxlist = [r'("\\)', r'(\\")'] rx = re.compile('|'.join(rxlist)) for i in range(len(lod)): line_ = lod[i]['sentence'] line = re.sub(rx, '', line_) caus = lod[i]['cause'] caus = re.sub(rx, '', caus) effe = lod[i]['effect'] effe = re.sub(rx, '', effe) d = defaultdict(list) index = 0 for idx, w in enumerate(word_tokenize(line)): index = line.find(w, index) if not index == -1: d[idx].append([w, index]) #print(w, index) index += len(w) d_ = defaultdict(list) for idx in d: d_[idx].append([tuple([d[idx][0][0], '_']), d[idx][0][1]]) init_e = line.find(effe) init_e = 0 if init_e == -1 else init_e init_c = line.find(caus) init_c = 0 if init_c == -1 else init_c for c, cl in enumerate(word_tokenize(caus)): #print('init_c', init_c) init_c = line.find(cl, init_c) #print('start Cause', init_c) stop = line.find(cl, init_c) + len(cl) word = line[init_c:stop] #print('word', word.upper(), 'el', cl.upper()) for idx in d_: if int(init_c) == int(d_[idx][0][1]): und_ = defaultdict(list) und_[idx].append( [tuple([word, 'C']), line.find(word, init_c)]) d_[idx] = und_[idx] init_c += len(cl) #print('increment_c', init_c) for e, el in enumerate(word_tokenize(effe)): #print('init_e', init_e) init_e = line.find(el, init_e) #print('start Effect', init_e) stop = line.find(el, init_e) + len(el) word = line[init_e:stop] #print('word', word.upper(), 'el', el.upper()) for idx in d_: if int(init_e) == int(d_[idx][0][1]): und_ = defaultdict(list) und_[idx].append( [tuple([word, 'E']), line.find(word, init_e)]) d_[idx] = und_[idx] init_e += len(word) #print('init_e', init_e) dd[i].append(d_) for dict_ in dd: dd_.append([ item[0][0] for sub in [[j for j in i.values()] for i in lflatten(dd[dict_])] for item in sub ]) return dd_
def make_causal_input(lod, map_, silent=True, bio=False): """ :param lod: list of dictionaries :param map_: mapping of tags and values of interest, i.e. [('cause', 'C'), ('effect', 'E')]. The silent tags are by default taggerd as '_' :return: dict of list of tuples for each sentence """ dd = defaultdict(list) dd_ = [] rx = re.compile(r"(\b[-']\b)|[\W_]") rxlist = [r'("\\)', r'(\\")'] rx = re.compile('|'.join(rxlist)) for i in range(len(lod)): line_ = lod[i]['sentence'] line = re.sub(rx, '', line_) ante = lod[i]['cause'] ante = re.sub(rx, '', ante) cons = lod[i]['effect'] cons = re.sub(rx, '', cons) silent or print(line) d = defaultdict(list) index = 0 for idx, w in enumerate(word_tokenize(line)): index = line.find(w, index) if not index == -1: d[idx].append([w, index]) silent or print(w, index) index += len(w) d_ = defaultdict(list) for idx in d: d_[idx].append([tuple([d[idx][0][0], '_']), d[idx][0][1]]) def cut_space(init_t): for s_idx, s in enumerate(line[init_t:]): if s != ' ': init_t += s_idx return init_t init_a = cut_space(line.find(ante)) init_c = cut_space(line.find(cons)) ante_list = word_tokenize(ante) for (el_idx, el) in enumerate(ante_list): start = line.find(el, init_a) # print('start A') # print(start) # print(int(d_[idx][0][1])) stop = line.find(el, init_a) + len(el) word = line[start:stop] # print(word) if int(start) == int(d_[idx][0][1]): und_ = defaultdict(list) if bio and el_idx == 0: und_[idx].append( [tuple([word, 'B-C']), line.find(word, init_a)]) elif bio: und_[idx].append( [tuple([word, 'I-C']), line.find(word, init_a)]) else: und_[idx].append( [tuple([word, 'C']), line.find(word, init_a)]) d_[idx] = und_[idx] break # init_a += len(word) # wrong init_a = cut_space(init_a + len(word)) cons_list = word_tokenize(cons) for (el_idx, el) in enumerate(cons_list): start = line.find(el, init_c) # print('start C') # print(start) # print(int(d_[idx][0][1])) stop = line.find(el, init_c) + len(el) word = line[start:stop] # print(word) if int(start) == int(d_[idx][0][1]): und_ = defaultdict(list) if bio and el_idx == 0: und_[idx].append( [tuple([word, 'B-E']), line.find(word, init_c)]) elif bio: und_[idx].append( [tuple([word, 'I-E']), line.find(word, init_c)]) else: und_[idx].append( [tuple([word, 'E']), line.find(word, init_c)]) d_[idx] = und_[idx] break # init_c += len(word) # wrong init_c = cut_space(init_c + len(word)) dd[i].append(d_) for dict_ in dd: dd_.append([ item[0][0] for sub in [[j for j in i.values()] for i in lflatten(dd[dict_])] for item in sub ]) return dd_
def test_added_elements_are_all_saved_to_dyn_arr(xs): dyn_arr = dynamic_array() for num_inseerted, x in enumerate(xs, start=1): insert(dyn_arr, x) assert len(F.lflatten(dyn_arr)) == num_inseerted assert set(F.flatten(dyn_arr)) == set(xs)