def testdata(model_path, test_music, phase, save_path, start=0, end=1024): savefile_name = model_path.name + ".wav" end = start + end wav_L = util.separate(test_music[0], model_path, mask=test_music[2], start=start, end=end) wav_R = util.separate(test_music[1], model_path, mask=test_music[2], start=start, end=end) wav_Stereo = np.array((wav_L, wav_R)) del wav_L, wav_R util.SaveStereoAudio("target-%s" % savefile_name, wav_Stereo, phase[:, :, start:end], save_path=save_path) wav_L = util.separate(test_music[0], model_path, mask=test_music[2], multi=1, start=start, end=end) wav_R = util.separate(test_music[1], model_path, mask=test_music[2], multi=1, start=start, end=end) wav_Stereo = np.array((wav_L, wav_R)) del wav_L, wav_R util.SaveStereoAudio("inst-%s" % savefile_name, wav_Stereo, phase[:, :, start:end], save_path=save_path)
def main(nodelist, job_id): """main called when run as script""" log.debug(f"main {nodelist} {job_id}") nodes = util.to_hostnames(nodelist) if job_id is not None: _, exclusive = separate(is_exclusive_node, nodes) if exclusive: hostlist = util.to_hostlist(exclusive) log.info(f"epilog suspend {hostlist} job_id={job_id}") epilog_suspend_nodes(exclusive, job_id) else: # suspend is allowed to delete exclusive nodes log.info(f"suspend {nodelist}") suspend_nodes(nodes)
def bakker_kMeans(data, **kwargs): """ This is an implementation of the k-means algorithm designed specifically for flow cytometry data in the following paper: T.C.B. Schut, B.G.D. Grooth, and J. Greve, "Cluster analysis of flow cytometric list mode data on a personal computer", Cytometry, vol. 14, 1993, pp. 649-659. @type data: array @param data: The data to be clustered @type kwargs: dict @param kwargs: The following args are accepted: - numClusters: The number of clusters to form @rtype: tuple @return: A list where each element indicates the cluster membership of the corresponding index in the original data and a message string """ k = 1 initClusters = 200 msg = '' if 'numClusters' in kwargs.keys(): k = int(kwargs['numClusters']) if 'initClusters' in kwargs.keys(): initClusters = int(kwargs['numClusters']) # Log transform logData = tm.getMethod('log')(data) # Choose large # (200 as suggested by authors) of non-random initial centers centers = util.kinit(logData, initClusters) # Run k-means _, ids = kmeans2(logData, np.array(centers), minit='matrix') # Merge clusters w/special comparison metric until user cluster # achieved clusters = util.separate(logData, ids) finalIDs = merge(k, ids, clusters) return finalIDs, msg
def delete_instances(instances): """Call regionInstances.bulkInsert to create instances""" if len(instances) == 0: return invalid, valid = separate(lambda inst: bool(lkp.instance(inst)), instances) log.debug("instances do not exist: {}".format(",".join(invalid))) if lkp.cfg.enable_reconfigure: count = len(instances) hostlist = util.to_hostlist(valid) log.info("delete {} subscriptions ({})".format(count, hostlist)) execute_with_futures(subscription_delete, valid) requests = {inst: delete_instance_request(inst) for inst in valid} done, failed = batch_execute(requests) if failed: failed_nodes = [f"{n}: {e}" for n, (_, e) in failed.items()] node_str = "\n".join(str(el) for el in truncate_iter(failed_nodes, 5)) log.error(f"some nodes failed to delete: {node_str}") wait_for_operations(done.values())
def main(nodelist, job_id, force=False): """main called when run as script""" log.debug(f"main {nodelist} {job_id}") # nodes are split between normal and exclusive # exclusive nodes are handled by PrologSlurmctld nodes = expand_nodelist(nodelist) if force: exclusive = normal = nodes prelog = "force " else: normal, exclusive = separate(is_exclusive_node, nodes) prelog = "" if job_id is None or force: if normal: hostlist = util.to_hostlist(normal) log.info(f"{prelog}resume {hostlist}") resume_nodes(normal) else: if exclusive: hostlist = util.to_hostlist(exclusive) log.info(f"{prelog}exclusive resume {hostlist} {job_id}") prolog_resume_nodes(job_id, exclusive)
def parse_text(self, text, default_speaker, separate_comma=False, n_gram=2, separate_sentence=False, parse_speaker=True, normalize=True): """ Parse the input text into suitable data structure :param n_gram: concat sentences of this max length in a line :param text: source :param default_speaker: the default speaker if no speaker in specified :param separate_comma: split by comma :param separate_sentence: split sentence if multiple clauses exist :param parse_speaker: bool for turn on/off parse speaker :param normalize: to convert common punctuation besides comma to comma """ lines = re.split(r'\r\n|\n\r|\r|\n', text) line_speaker_dict = {} # TODO: allow speakers not in model_list and later are forced to be replaced if parse_speaker: # re.match(r'^.*(?=:)', text) for i, line in enumerate(lines): if re.search(r':|\|', line): # ?: non capture group of : and | speaker, line = re.split(r'\s*(?::|\|)\s*', line, 1) # add entry only if the voice model exist in the folder, # the unrecognized one will be changed to default in later code if speaker in self.model_list: line_speaker_dict[i] = speaker lines[i] = line if normalize: lines = [normalize_text(line) for line in lines] # separate by spacy sentencizer lines = [ separate(line, n_gram, comma=separate_comma) for line in lines ] sentence_dicts = [] for i, line in enumerate(lines): for j, sent in enumerate(line): if sentence_dicts: if sent[0].is_punct and not any(sent[0].text == punct for punct in ['“', '‘']): sentence_dicts[-1][ 'punct'] = sentence_dicts[-1]['punct'] + sent.text continue sentence_dict = { 'text': sent.text, 'begin': True if j == 0 else False, 'punct': '', 'speaker': line_speaker_dict.get(i, self.model_list[default_speaker]) } while not sentence_dict['text'][-1].isalpha(): sentence_dict['punct'] = sentence_dict[ 'punct'] + sentence_dict['text'][-1] sentence_dict['text'] = sentence_dict['text'][:-1] # Reverse the punctuation order since I add it based on the last item sentence_dict['punct'] = sentence_dict['punct'][::-1] sentence_dict[ 'text'] = sentence_dict['text'] + sentence_dict['punct'] sentence_dicts.append(sentence_dict) speaker_dict = {} for i, sentence_dict in enumerate(sentence_dicts): if sentence_dict['speaker'] not in speaker_dict: speaker_dict[sentence_dict['speaker']] = [] speaker_dict[sentence_dict['speaker']].append(i) self.speaker_dict = speaker_dict self.sentence_dicts = sentence_dicts
def parse_text(self, text, default_speaker, force_parse=False, separate_comma=False, n_gram=2, separate_sentence=False, parse_speaker=True, normalize=True): """ Parse the input text into suitable data structure :param force_parse: forced to replace all speaker that are not in model list as the default speaker :param n_gram: concat sentences of this max length in a line :param text: source :param default_speaker: the default speaker if no speaker in specified :param separate_comma: split by comma :param separate_sentence: split sentence if multiple clauses exist :param parse_speaker: bool for turn on/off parse speaker :param normalize: to convert common punctuation besides comma to comma """ lines = re.split(r'\r\n|\n\r|\r|\n', text) line_speaker_dict = {} self.speaker_list = [] self.speaker_map_dict = {} if parse_speaker: # re.match(r'^.*(?=:)', text) for i, line in enumerate(lines): if re.search(r':|\|', line): # ?: non capture group of : and | speaker, line = re.split(r'\s*(?::|\|)\s*', line, 1) # add entry only if the voice model exist in the folder, # the unrecognized one will need to mapped so as to be able to be synthesized if force_parse: if speaker in self.model_list: line_speaker_dict[i] = speaker else: if speaker not in self.speaker_list: self.speaker_list.append(speaker) line_speaker_dict[i] = speaker lines[i] = line for i, speaker in enumerate(self.speaker_list): if speaker not in self.model_list: self.speaker_map_dict[speaker] = self.model_list[i % len( self.model_list)] # separate by spacy sentencizer lines = [ separate(fix_text(line), n_gram, comma=separate_comma) for line in lines ] self.sentence_dicts = [] for i, line in enumerate(lines): for j, sent in enumerate(line): if self.sentence_dicts: # might be buggy, forgot why I wrote this at all while sent[0].is_punct and not any( sent[0].text == punct for punct in ['“', '‘']): self.sentence_dicts[-1]['punct'] = self.sentence_dicts[ -1]['punct'] + sent.text[0] sent = sent[1:] continue sentence_dict = { 'text': sent.text, 'begin': True if j == 0 else False, 'punct': '', 'speaker': line_speaker_dict.get(i, default_speaker) } while not sentence_dict['text'][-1].isalpha(): sentence_dict['punct'] = sentence_dict[ 'punct'] + sentence_dict['text'][-1] sentence_dict['text'] = sentence_dict['text'][:-1] # Reverse the punctuation order since I add it based on the last item sentence_dict['punct'] = sentence_dict['punct'][::-1] sentence_dict[ 'text'] = sentence_dict['text'] + sentence_dict['punct'] self.sentence_dicts.append(sentence_dict) self.update_speaker_dict()
def __init__(self, parent, isolate = False): hGap = 20 vGap = 10 clusterIDs = DataStore.getCurrentDataSet().getCurrentClustering() clustering = separate(DataStore.getCurrentDataSet().data, clusterIDs) numClusters = len(clustering) numColumns = 3 self.radioList = [] self.isolate = isolate #TODO: still not perfect, larger cluster sizes gives an increasing space # at the bottom # The magic # includes the header width, the vgap for it, and the widths and # border pads for the two sizers dlgWidth = 275 dlgHeight = ((numClusters+1) * (vGap+20)) + 100 if isolate: dlgWidth += 50 #dlgHeight += 20 title = 'Clustering Info' if (isolate): title = 'Isolate clusters' wx.Dialog.__init__(self, parent, wx.ID_ANY, title, size=(dlgWidth, dlgHeight)) self.CenterOnParent() # create main data display sizer # one row for each cluster plus header row # cols: cluster color, % of total self.formSizer = None if isolate: self.formSizer = wx.FlexGridSizer(numClusters+1, numColumns+1, hgap=hGap, vgap=vGap) else: self.formSizer = wx.FlexGridSizer(numClusters, numColumns, hgap=hGap, vgap=vGap) # header row if isolate: self.formSizer.Add(wx.StaticText(self, -1, 'Select', (5,10)), 1, wx.EXPAND) self.formSizer.Add(wx.StaticText(self, -1, 'Cluster', (5, 10)), 1, wx.EXPAND) self.formSizer.Add(wx.StaticText(self, -1, '% of Total', (20, 10)), 1, wx.EXPAND) self.formSizer.Add(wx.StaticText(self, -1, '# of Events', (20, 10)), 1, wx.EXPAND) # data rows for i in range(len(clustering)): cluster = clustering[i] if isolate: self.radioList.append(wx.CheckBox(self, wx.ID_ANY)) self.formSizer.Add(self.radioList[i], 0, wx.EXPAND) # cluster color box label = wx.StaticText(self, -1, '', (20, 10)) label.SetBackgroundColour(plot.methods.plotColors[i]) self.formSizer.Add(label, 1, wx.EXPAND) # % of total percent = float(len(cluster))/len(DataStore.getCurrentDataSet().data)*100 label = wx.StaticText(self, -1, '%6.2f' % percent + ' %', (30, 10)) self.formSizer.Add(label, 1, wx.EXPAND | wx.ALIGN_CENTER) # number of events label = wx.StaticText(self, -1, str(len(cluster)), (30, 10)) self.formSizer.Add(label, 1, wx.EXPAND | wx.ALIGN_CENTER) # create the button row self.buttonSizer = None if isolate: self.buttonSizer = self.CreateButtonSizer(wx.OK | wx.CANCEL) else: self.buttonSizer = self.CreateButtonSizer(wx.OK) # main sizer self.sizer = wx.BoxSizer(wx.VERTICAL) self.sizer.Add(self.formSizer, 1, wx.EXPAND | wx.LEFT | wx.RIGHT | wx.TOP, 20) self.sizer.Add(self.buttonSizer, 0, wx.EXPAND | wx.LEFT | wx.RIGHT | wx.TOP | wx.BOTTOM, 20) self.SetSizer(self.sizer)
def resume_nodes(nodelist, placement_groups=None, exclusive=False): """resume nodes in nodelist""" def ident_key(n): # ident here will refer to the combination of partition and group return "-".join( ( lkp.node_partition_name(n), lkp.node_group_name(n), ) ) # support already expanded list nodes = nodelist if isinstance(nodes, str): nodelist = expand_nodelist(nodelist) nodes = sorted(nodelist, key=ident_key) if len(nodes) == 0: return grouped_nodes = { ident: chunk for ident, nodes in groupby(nodes, ident_key) for chunk in chunked(nodes, n=BULK_INSERT_LIMIT) } log.debug(f"grouped_nodes: {grouped_nodes}") # make all bulkInsert requests and execute with batch inserts = { ident: create_instances_request(nodes, placement_groups, exclusive) for ident, nodes in grouped_nodes.items() } started, failed = batch_execute(inserts) if failed: failed_reqs = [f"{e}" for _, (_, e) in failed.items()] log.error("bulkInsert API failures: {}".format("\n".join(failed_reqs))) for ident, (_, exc) in failed.items(): down_nodes(grouped_nodes[ident], exc._get_reason()) # wait for all bulkInserts to complete and log any errors bulk_operations = [wait_for_operation(op) for op in started.values()] for bulk_op in bulk_operations: if "error" in bulk_op: error = bulk_op["error"]["errors"][0] log.error( f"bulkInsert operation error: {error['code']} operationName:'{bulk_op['name']}'" ) # Fetch all insert operations from all bulkInserts. Group by error code and log successful_inserts, failed_inserts = separate( lambda op: "error" in op, get_insert_operations(bulk_operations) ) # Apparently multiple errors are possible... so join with +. # grouped_inserts could be made into a dict, but it's not really necessary. Save some memory. grouped_inserts = util.groupby_unsorted( failed_inserts, lambda op: "+".join(err["code"] for err in op["error"]["errors"]), ) for code, failed_ops in grouped_inserts: # at least one insert failure failed_nodes = [parse_self_link(op["targetLink"]).instance for op in failed_ops] hostlist = util.to_hostlist(failed_nodes) count = len(failed_nodes) log.error( f"{count} instances failed to start due to insert operation error: {code} ({hostlist})" ) down_nodes(hostlist, code) if log.isEnabledFor(logging.DEBUG): msg = "\n".join( err["message"] for err in next(failed_ops)["error"]["errors"] ) log.debug(f"{code} message from first node: {msg}") # If reconfigure enabled, create subscriptions for successfully started instances if lkp.cfg.enable_reconfigure and len(successful_inserts): started_nodes = [ parse_self_link(op["targetLink"]).instance for op in successful_inserts ] count = len(started_nodes) hostlist = util.to_hostlist(started_nodes) log.info("create {} subscriptions ({})".format(count, hostlist)) execute_with_futures(subscription_create, nodes)
def partition_mounts(mounts): """partition into cluster-external and internal mounts""" def internal_mount(mount): return mount.server_ip == lkp.control_host return separate(internal_mount, mounts)