def batchcounts(self, queries, subset=None, start=None, end=None): if self.macros is None: patterns = [_regex_parse_query(query, self.flags) for query in queries] else: patterns = [_regex_parse_query(query.format( **self.macros), self.flags) for query in queries] chunksize = max(int(len(patterns) / (self.numproc * 4)), 1) chunkedpatterns = [patterns[n:n + chunksize] for n in range(0, len(patterns), chunksize)] result = OrderedDict((name, []) for name in subset or self.files) for filename in subset or self.files: result = array.array('I') for tmp in self._map(_regex_run_batch, chunkedpatterns, filename=filename, fileno=self.fileno[filename], lineidxpath=self.lineidxpath, start=start, end=end): result.extend(tmp) yield filename, result
def batchsents(self, queries, subset=None, start=None, end=None, maxresults=100, brackets=False): """Variant of sents() to run a batch of queries.""" if brackets: raise ValueError('not applicable with plain text corpus.') if self.macros is None: patterns = [_regex_parse_query(query, self.flags) for query in queries] else: patterns = [_regex_parse_query(query.format( **self.macros), self.flags) for query in queries] chunksize = max(int(len(patterns) / (self.numproc * 4)), 1) chunkedpatterns = [patterns[n:n + chunksize] for n in range(0, len(patterns), chunksize)] result = OrderedDict((name, []) for name in subset or self.files) for filename in subset or self.files: result = [] for tmp in self._map(_regex_run_batch, chunkedpatterns, filename=filename, fileno=self.fileno[filename], lineidxpath=self.lineidxpath, start=start, end=end, maxresults=maxresults, sents=True): result.extend(tmp) yield filename, result
def render_json_for_related_rrdata(self, for_list=False, include_dnsdata=True, as_dict=False, user=None): """Render a representation of this domain's related non-IP data, suitable for converting to JSON. :return: data""" from maasserver.models import DNSData, StaticIPAddress if include_dnsdata is True: rr_mapping = DNSData.objects.get_hostname_dnsdata_mapping( self, raw_ttl=True) else: # Circular imports. from maasserver.models.dnsdata import HostnameRRsetMapping rr_mapping = defaultdict(HostnameRRsetMapping) # Smash the IP Addresses in the rrset mapping, so that the far end # only needs to worry about one thing. ip_mapping = StaticIPAddress.objects.get_hostname_ip_mapping( self, raw_ttl=True) for hostname, info in ip_mapping.items(): if (user is not None and not user.is_superuser and info.user_id is not None and info.user_id != user.id): continue entry = rr_mapping[hostname[:-len(self.name) - 1]] entry.dnsresource_id = info.dnsresource_id if info.system_id is not None: entry.system_id = info.system_id entry.node_type = info.node_type if info.user_id is not None: entry.user_id = info.user_id for ip in info.ips: record_type = "AAAA" if IPAddress(ip).version == 6 else "A" entry.rrset.add((info.ttl, record_type, ip, None)) if as_dict is True: result = OrderedDict() else: result = [] for hostname, info in rr_mapping.items(): data = [ { "name": hostname, "system_id": info.system_id, "node_type": info.node_type, "user_id": info.user_id, "dnsresource_id": info.dnsresource_id, "ttl": ttl, "rrtype": rrtype, "rrdata": rrdata, "dnsdata_id": dnsdata_id, } for ttl, rrtype, rrdata, dnsdata_id in info.rrset if (info.user_id is None or user is None or user.is_superuser or (info.user_id is not None and info.user_id == user.id)) ] if as_dict is True: existing = result.get(hostname, []) existing.extend(data) result[hostname] = existing else: result.extend(data) return result
def _get_fields(self, forward=True, reverse=True, include_parents=True, include_hidden=False, export_ordered_set=False): # This helper function is used to allow recursion in ``get_fields()`` # implementation and to provide a fast way for Django's internals to # access specific subsets of fields. # Creates a cache key composed of all arguments cache_key = (forward, reverse, include_parents, include_hidden, export_ordered_set) try: # In order to avoid list manipulation. Always return a shallow copy # of the results. return self._get_fields_cache[cache_key] except KeyError: pass # Using an OrderedDict preserves the order of insertion. This is # important when displaying a ModelForm or the contrib.admin panel # and no specific ordering is provided. fields = OrderedDict() options = { 'include_parents': include_parents, 'include_hidden': include_hidden, 'export_ordered_set': True, } # Abstract models cannot hold reverse fields. if reverse and not self.abstract: if include_parents: parent_list = self.get_parent_list() # Recursively call _get_fields() on each parent, with the same # options provided in this call. for parent in self.parents: for obj, _ in six.iteritems(parent._meta._get_fields(forward=False, **options)): if obj.many_to_many: # In order for a reverse ManyToManyRel object to be # valid, its creation counter must be > 0 and must # be in the parent list. if not (obj.field.creation_counter < 0 and obj.related_model not in parent_list): fields[obj] = True elif not ((obj.field.creation_counter < 0 or obj.field.rel.parent_link) and obj.related_model not in parent_list): fields[obj] = True # Tree is computed once and cached until the app cache is expired. # It is composed of a list of fields pointing to the current model # from other models. If the model is a proxy model, then we also # add the concrete model. all_fields = ( self._relation_tree if not self.proxy else chain(self._relation_tree, self.concrete_model._meta._relation_tree) ) # Pull out all related objects from forward fields for field in (f.rel for f in all_fields): # If hidden fields should be included or the relation is not # intentionally hidden, add to the fields dict. if include_hidden or not field.hidden: fields[field] = True if forward: if include_parents: for parent in self.parents: # Add the forward fields of each parent. fields.update(parent._meta._get_fields(reverse=False, **options)) fields.update( (field, True,) for field in chain(self.local_fields, self.local_many_to_many) ) if not export_ordered_set: # By default, fields contains field instances as keys and all # possible names if the field instance as values. When # _get_fields() is called, we only want to return field instances, # so we just preserve the keys. fields = list(fields.keys()) # Virtual fields are not inheritable, therefore they are inserted # only when the recursive _get_fields() call comes to an end. if forward: fields.extend(self.virtual_fields) fields = make_immutable_fields_list("get_fields()", fields) # Store result into cache for later access self._get_fields_cache[cache_key] = fields # In order to avoid list manipulation. Always # return a shallow copy of the results return fields
d['b'] = 2 d['c'] # deque d = deque([1]) d d.append(2) d.appendleft(0) d d.pop() d.popleft() d d.extend([2, 3]) d.extendleft([0, -1]) # extend [-1, 0] to the left d d.rotate(1) d d.rotate(-1) d d_max = deque("hello", maxlen=5) d_max.appendleft("A") d_max d_max.extendleft(["B", "C"]) d_max # 8.3 collections - High-performance container datatypes
d = deque('ghi') for elem in d: # iterate over the deque's elements print(elem.upper()) d.append('j') # add a new entry to the right side d.appendleft('f') # add a new entry to the left side d.pop() # return and remove the rightmost item d.popleft() # return and remove the leftmost item list(d) # list the contents of the deque d[0] # peek at leftmost item d[-1] # peek at rightmost item list(reversed(d)) # list the contents of a deque in reverse 'h' in d # search the deque d.extend('jkl') # add multiple elements at once d.rotate(1) # right rotation d.rotate(-1) # left rotation deque(reversed(d)) # make a new deque in reverse order d.clear() # empty the deque #d.pop() # cannot pop from an empty deque d.extendleft('abc') # extendleft() reverses the input order #Section 47.6: collections.ChainMap print("---------Section 47.6: collections.ChainMap----------") # define two dictionaries with at least some keys overlapping. dict1 = {'apple': 1, 'banana': 2} dict2 = {'coconut': 1, 'date': 1, 'apple': 3} # create two ChainMaps with different ordering of those dicts. combined_dict = collections.ChainMap(dict1, dict2) reverse_ordered_dict = collections.ChainMap(dict2, dict1)
CGM = CGM.append(cvmgt) CGM = CGM.drop('AREA', axis=1) cols = ['10', '11', '12', '1', '2', '3', '4', '5', '6', '7', '8', '9'] CGM = CGM[cols] CGM['REMI'] = CGM.index #grouped = CGM.groupby('REMI', 'AREA']) ###################### Aggiunta remi entranti ################################# entranti = list( set(resdf.REMI.values.tolist()).difference(set(CGM.index.values.tolist()))) DFE = OrderedDict() for ent in entranti: res = [] dfe = resdf.loc[resdf.REMI == ent] captot = dfe['DA CONFERIRE'].sum() res.extend(np.repeat(captot, 12).tolist()) res.append(ent) DFE[ent] = res DFE = pd.DataFrame.from_dict(DFE, orient='index') DFE.columns = CGM.columns CGM = CGM.append(DFE) CGM = CGM.groupby('REMI') CGM = CGM.agg(sum) ###### a qui 7 ######### ###### da qui fino alla fine ######### ###############################################################################
def items_clustering(): items = load_all_items() annotated_items = get_annotated_data("items") sents = [sent.replace('"', "") for sent in items] sents = [sent.replace(",", " ") for sent in sents] sents = [sent.replace("/", " / ") for sent in sents] sents = [sent.replace(".-", " .- ") for sent in sents] sents = [sent.replace(".", " . ") for sent in sents] sents = [sent.replace("'", " ' ") for sent in sents] sents = [sent.replace("\n", "") for sent in sents] sents = [sent.lower() for sent in sents] sents = [" ".join(sent.split()) for sent in sents] annotated_items_idxs = {} for key, key_items in annotated_items.items(): annotated_items_idxs[key] = [] for cluster in key_items: annotated_items_idxs[key].append([]) for item in cluster: cleaned_item = " ".join(item.split()) cleaned_item = cleaned_item.replace('"', "") cleaned_item = cleaned_item.replace(",", " ") cleaned_item = cleaned_item.replace("/", " / ") cleaned_item = cleaned_item.replace(".-", " .- ") cleaned_item = cleaned_item.replace(".", " . ") cleaned_item = cleaned_item.replace(" ", " ") cleaned_item = cleaned_item.replace("'", " ' ") cleaned_item = cleaned_item.lower() if cleaned_item in sents: annotated_items_idxs[key][-1].append( sents.index(cleaned_item)) elif len(cleaned_item.split()) > 2: print(cleaned_item + ",") with open(f"expert_annotations/dec3_expert_knowledge.pck", "rb") as f: dec3_data = pickle.load(f) sentence_annotations = [] for index, samples in dec3_data.items(): for item in samples: cleaned_item = " ".join(item.split()) cleaned_item = cleaned_item.replace('"', "") cleaned_item = cleaned_item.replace(",", " ") cleaned_item = cleaned_item.replace("/", " / ") cleaned_item = cleaned_item.replace(".-", " .- ") cleaned_item = cleaned_item.replace(".", " . ") cleaned_item = cleaned_item.replace(" ", " ") cleaned_item = cleaned_item.replace("'", " ' ") cleaned_item = cleaned_item.lower() if cleaned_item[-1] == " ": cleaned_item = cleaned_item[:-1] if cleaned_item in sents: sentence_annotations.append((sents.index(cleaned_item), index)) elif len(cleaned_item.split()) > 2: print(cleaned_item + ",") results_dir = "./results/items" if not os.path.isdir(results_dir): os.makedirs(results_dir) results = OrderedDict() hparams, pre_hparams = get_hparams() results = [] all_configs = list( product(*[[(key, val) for val in vals] for key, vals in hparams.items()])) for pre_config in tqdm( list( product(*[[(key, val) for val in vals] for key, vals in pre_hparams.items()])), desc="Processing configs for items", ): sents = deepcopy(items) pre_config = dict(pre_config) vocab, sents = preprocess(sents, pre_config["word_filtering"], pre_config["reduce_method"]) sent_embeddings = sentence_vectorize(pre_config["reduce_method"], pre_config["model"], sents, vocab) results.extend( Parallel(n_jobs=-1)(launch_from_config( dict(config), pre_config, results_dir, vocab, sents, sent_embeddings, annotated_items_idxs, sentence_annotations, ) for config in all_configs)) results = sorted(results, key=lambda item: item[1]) for result in results: print(result) with open(os.path.join(results_dir, "results.pck"), "wb") as f: pickle.dump(results, f)