Beispiel #1
0
def testdata(model_path, test_music, phase, save_path, start=0, end=1024):

    savefile_name = model_path.name + ".wav"
    end = start + end
    wav_L = util.separate(test_music[0],
                          model_path,
                          mask=test_music[2],
                          start=start,
                          end=end)
    wav_R = util.separate(test_music[1],
                          model_path,
                          mask=test_music[2],
                          start=start,
                          end=end)
    wav_Stereo = np.array((wav_L, wav_R))
    del wav_L, wav_R

    util.SaveStereoAudio("target-%s" % savefile_name,
                         wav_Stereo,
                         phase[:, :, start:end],
                         save_path=save_path)

    wav_L = util.separate(test_music[0],
                          model_path,
                          mask=test_music[2],
                          multi=1,
                          start=start,
                          end=end)
    wav_R = util.separate(test_music[1],
                          model_path,
                          mask=test_music[2],
                          multi=1,
                          start=start,
                          end=end)
    wav_Stereo = np.array((wav_L, wav_R))
    del wav_L, wav_R

    util.SaveStereoAudio("inst-%s" % savefile_name,
                         wav_Stereo,
                         phase[:, :, start:end],
                         save_path=save_path)
Beispiel #2
0
def main(nodelist, job_id):
    """main called when run as script"""
    log.debug(f"main {nodelist} {job_id}")
    nodes = util.to_hostnames(nodelist)
    if job_id is not None:
        _, exclusive = separate(is_exclusive_node, nodes)
        if exclusive:
            hostlist = util.to_hostlist(exclusive)
            log.info(f"epilog suspend {hostlist} job_id={job_id}")
            epilog_suspend_nodes(exclusive, job_id)
    else:
        # suspend is allowed to delete exclusive nodes
        log.info(f"suspend {nodelist}")
        suspend_nodes(nodes)
Beispiel #3
0
def bakker_kMeans(data, **kwargs):
    """
    This is an implementation of the k-means algorithm designed specifically
    for flow cytometry data in the following paper:
    
    T.C.B. Schut, B.G.D. Grooth, and J. Greve, 
    "Cluster analysis of flow cytometric list mode data on a personal computer", 
    Cytometry,  vol. 14, 1993, pp. 649-659.

    @type data: array
    @param data: The data to be clustered
    @type kwargs: dict
    @param kwargs: The following args are accepted:
        - numClusters: The number of clusters to form
            
    @rtype: tuple
    @return: A list where each element indicates the cluster membership of the 
        corresponding index in the original data and a message string
    """
    k = 1
    initClusters = 200
    msg = ''
    
    if 'numClusters' in kwargs.keys():
        k = int(kwargs['numClusters'])
    if 'initClusters' in kwargs.keys():
        initClusters = int(kwargs['numClusters'])
    
    # Log transform
    logData = tm.getMethod('log')(data)
    
    # Choose large # (200 as suggested by authors) of non-random initial centers
    centers = util.kinit(logData, initClusters)
    
    # Run k-means
    _, ids = kmeans2(logData, np.array(centers), minit='matrix')
    
    # Merge clusters w/special comparison metric until user cluster # achieved
    clusters = util.separate(logData, ids)
    finalIDs = merge(k, ids, clusters)
    
    return finalIDs, msg
Beispiel #4
0
def delete_instances(instances):
    """Call regionInstances.bulkInsert to create instances"""
    if len(instances) == 0:
        return
    invalid, valid = separate(lambda inst: bool(lkp.instance(inst)), instances)
    log.debug("instances do not exist: {}".format(",".join(invalid)))

    if lkp.cfg.enable_reconfigure:
        count = len(instances)
        hostlist = util.to_hostlist(valid)
        log.info("delete {} subscriptions ({})".format(count, hostlist))
        execute_with_futures(subscription_delete, valid)

    requests = {inst: delete_instance_request(inst) for inst in valid}
    done, failed = batch_execute(requests)
    if failed:
        failed_nodes = [f"{n}: {e}" for n, (_, e) in failed.items()]
        node_str = "\n".join(str(el) for el in truncate_iter(failed_nodes, 5))
        log.error(f"some nodes failed to delete: {node_str}")
    wait_for_operations(done.values())
Beispiel #5
0
def main(nodelist, job_id, force=False):
    """main called when run as script"""
    log.debug(f"main {nodelist} {job_id}")
    # nodes are split between normal and exclusive
    # exclusive nodes are handled by PrologSlurmctld
    nodes = expand_nodelist(nodelist)
    if force:
        exclusive = normal = nodes
        prelog = "force "
    else:
        normal, exclusive = separate(is_exclusive_node, nodes)
        prelog = ""
    if job_id is None or force:
        if normal:
            hostlist = util.to_hostlist(normal)
            log.info(f"{prelog}resume {hostlist}")
            resume_nodes(normal)
    else:
        if exclusive:
            hostlist = util.to_hostlist(exclusive)
            log.info(f"{prelog}exclusive resume {hostlist} {job_id}")
            prolog_resume_nodes(job_id, exclusive)
Beispiel #6
0
    def parse_text(self,
                   text,
                   default_speaker,
                   separate_comma=False,
                   n_gram=2,
                   separate_sentence=False,
                   parse_speaker=True,
                   normalize=True):
        """
        Parse the input text into suitable data structure
        :param n_gram: concat sentences of this max length in a line
        :param text: source
        :param default_speaker: the default speaker if no speaker in specified
        :param separate_comma: split by comma
        :param separate_sentence: split sentence if multiple clauses exist
        :param parse_speaker: bool for turn on/off parse speaker
        :param normalize: to convert common punctuation besides comma to comma
        """

        lines = re.split(r'\r\n|\n\r|\r|\n', text)

        line_speaker_dict = {}
        # TODO: allow speakers not in model_list and later are forced to be replaced
        if parse_speaker:
            # re.match(r'^.*(?=:)', text)
            for i, line in enumerate(lines):
                if re.search(r':|\|', line):
                    # ?: non capture group of : and |
                    speaker, line = re.split(r'\s*(?::|\|)\s*', line, 1)
                    # add entry only if the voice model exist in the folder,
                    # the unrecognized one will be changed to default in later code
                    if speaker in self.model_list:
                        line_speaker_dict[i] = speaker
                    lines[i] = line

        if normalize:
            lines = [normalize_text(line) for line in lines]

        # separate by spacy sentencizer
        lines = [
            separate(line, n_gram, comma=separate_comma) for line in lines
        ]

        sentence_dicts = []
        for i, line in enumerate(lines):
            for j, sent in enumerate(line):
                if sentence_dicts:
                    if sent[0].is_punct and not any(sent[0].text == punct
                                                    for punct in ['“', '‘']):
                        sentence_dicts[-1][
                            'punct'] = sentence_dicts[-1]['punct'] + sent.text
                        continue
                sentence_dict = {
                    'text':
                    sent.text,
                    'begin':
                    True if j == 0 else False,
                    'punct':
                    '',
                    'speaker':
                    line_speaker_dict.get(i, self.model_list[default_speaker])
                }

                while not sentence_dict['text'][-1].isalpha():
                    sentence_dict['punct'] = sentence_dict[
                        'punct'] + sentence_dict['text'][-1]
                    sentence_dict['text'] = sentence_dict['text'][:-1]
                # Reverse the punctuation order since I add it based on the last item
                sentence_dict['punct'] = sentence_dict['punct'][::-1]
                sentence_dict[
                    'text'] = sentence_dict['text'] + sentence_dict['punct']
                sentence_dicts.append(sentence_dict)

        speaker_dict = {}
        for i, sentence_dict in enumerate(sentence_dicts):
            if sentence_dict['speaker'] not in speaker_dict:
                speaker_dict[sentence_dict['speaker']] = []
            speaker_dict[sentence_dict['speaker']].append(i)
        self.speaker_dict = speaker_dict
        self.sentence_dicts = sentence_dicts
Beispiel #7
0
    def parse_text(self,
                   text,
                   default_speaker,
                   force_parse=False,
                   separate_comma=False,
                   n_gram=2,
                   separate_sentence=False,
                   parse_speaker=True,
                   normalize=True):
        """
        Parse the input text into suitable data structure
        :param force_parse: forced to replace all speaker that are not in model list as the default speaker
        :param n_gram: concat sentences of this max length in a line
        :param text: source
        :param default_speaker: the default speaker if no speaker in specified
        :param separate_comma: split by comma
        :param separate_sentence: split sentence if multiple clauses exist
        :param parse_speaker: bool for turn on/off parse speaker
        :param normalize: to convert common punctuation besides comma to comma
        """

        lines = re.split(r'\r\n|\n\r|\r|\n', text)

        line_speaker_dict = {}
        self.speaker_list = []
        self.speaker_map_dict = {}
        if parse_speaker:
            # re.match(r'^.*(?=:)', text)
            for i, line in enumerate(lines):
                if re.search(r':|\|', line):
                    # ?: non capture group of : and |
                    speaker, line = re.split(r'\s*(?::|\|)\s*', line, 1)
                    # add entry only if the voice model exist in the folder,
                    # the unrecognized one will need to mapped so as to be able to be synthesized
                    if force_parse:
                        if speaker in self.model_list:
                            line_speaker_dict[i] = speaker
                    else:
                        if speaker not in self.speaker_list:
                            self.speaker_list.append(speaker)
                        line_speaker_dict[i] = speaker
                    lines[i] = line

            for i, speaker in enumerate(self.speaker_list):
                if speaker not in self.model_list:
                    self.speaker_map_dict[speaker] = self.model_list[i % len(
                        self.model_list)]

        # separate by spacy sentencizer
        lines = [
            separate(fix_text(line), n_gram, comma=separate_comma)
            for line in lines
        ]

        self.sentence_dicts = []
        for i, line in enumerate(lines):
            for j, sent in enumerate(line):
                if self.sentence_dicts:
                    # might be buggy, forgot why I wrote this at all
                    while sent[0].is_punct and not any(
                            sent[0].text == punct for punct in ['“', '‘']):
                        self.sentence_dicts[-1]['punct'] = self.sentence_dicts[
                            -1]['punct'] + sent.text[0]
                        sent = sent[1:]
                        continue

                sentence_dict = {
                    'text': sent.text,
                    'begin': True if j == 0 else False,
                    'punct': '',
                    'speaker': line_speaker_dict.get(i, default_speaker)
                }

                while not sentence_dict['text'][-1].isalpha():
                    sentence_dict['punct'] = sentence_dict[
                        'punct'] + sentence_dict['text'][-1]
                    sentence_dict['text'] = sentence_dict['text'][:-1]
                # Reverse the punctuation order since I add it based on the last item
                sentence_dict['punct'] = sentence_dict['punct'][::-1]
                sentence_dict[
                    'text'] = sentence_dict['text'] + sentence_dict['punct']
                self.sentence_dicts.append(sentence_dict)

        self.update_speaker_dict()
Beispiel #8
0
 def __init__(self, parent, isolate = False):
     hGap = 20
     vGap = 10
     clusterIDs = DataStore.getCurrentDataSet().getCurrentClustering()
     clustering = separate(DataStore.getCurrentDataSet().data, clusterIDs)
     numClusters = len(clustering)
     numColumns = 3
     self.radioList = []
     self.isolate = isolate
     
     #TODO: still not perfect, larger cluster sizes gives an increasing space
     #      at the bottom
     # The magic # includes the header width, the vgap for it, and the widths and
     # border pads for the two sizers
     dlgWidth = 275
     dlgHeight = ((numClusters+1) * (vGap+20)) + 100
     if isolate:
         dlgWidth += 50
         #dlgHeight += 20
         
     title = 'Clustering Info'
     if (isolate):
         title = 'Isolate clusters'
     wx.Dialog.__init__(self, parent, wx.ID_ANY, title, size=(dlgWidth, dlgHeight))
     self.CenterOnParent()
     
     
     # create main data display sizer
     # one row for each cluster plus header row
     # cols: cluster color, % of total
     
     self.formSizer = None
     if isolate:
         self.formSizer = wx.FlexGridSizer(numClusters+1, numColumns+1, hgap=hGap, vgap=vGap)
     else:
         self.formSizer = wx.FlexGridSizer(numClusters, numColumns, hgap=hGap, vgap=vGap)
     # header row
     if isolate:
         self.formSizer.Add(wx.StaticText(self, -1, 'Select', (5,10)), 1, wx.EXPAND)
     self.formSizer.Add(wx.StaticText(self, -1, 'Cluster', (5, 10)), 1, wx.EXPAND)
     self.formSizer.Add(wx.StaticText(self, -1, '% of Total', (20, 10)), 1, wx.EXPAND)
     self.formSizer.Add(wx.StaticText(self, -1, '# of Events', (20, 10)), 1, wx.EXPAND)
     # data rows
     for i in range(len(clustering)):
         cluster = clustering[i]
         if isolate:
             self.radioList.append(wx.CheckBox(self, wx.ID_ANY))
             self.formSizer.Add(self.radioList[i], 0, wx.EXPAND)
         # cluster color box
         label = wx.StaticText(self, -1, '', (20, 10))
         label.SetBackgroundColour(plot.methods.plotColors[i])
         self.formSizer.Add(label, 1, wx.EXPAND)
         # % of total
         percent = float(len(cluster))/len(DataStore.getCurrentDataSet().data)*100
         label = wx.StaticText(self, -1, '%6.2f' % percent + ' %', (30, 10))
         self.formSizer.Add(label, 1, wx.EXPAND | wx.ALIGN_CENTER)
         # number of events
         label = wx.StaticText(self, -1, str(len(cluster)), (30, 10))
         self.formSizer.Add(label, 1, wx.EXPAND | wx.ALIGN_CENTER)        
     
     # create the button row
     self.buttonSizer = None
     if isolate:
         self.buttonSizer = self.CreateButtonSizer(wx.OK | wx.CANCEL)
     else:
         self.buttonSizer = self.CreateButtonSizer(wx.OK)
     
     
     # main sizer
     self.sizer = wx.BoxSizer(wx.VERTICAL)
     self.sizer.Add(self.formSizer, 1, wx.EXPAND | wx.LEFT | wx.RIGHT | wx.TOP, 20)
     self.sizer.Add(self.buttonSizer, 0, wx.EXPAND | wx.LEFT | wx.RIGHT | wx.TOP | wx.BOTTOM, 20)
     self.SetSizer(self.sizer)
Beispiel #9
0
def resume_nodes(nodelist, placement_groups=None, exclusive=False):
    """resume nodes in nodelist"""

    def ident_key(n):
        # ident here will refer to the combination of partition and group
        return "-".join(
            (
                lkp.node_partition_name(n),
                lkp.node_group_name(n),
            )
        )

    # support already expanded list
    nodes = nodelist
    if isinstance(nodes, str):
        nodelist = expand_nodelist(nodelist)

    nodes = sorted(nodelist, key=ident_key)
    if len(nodes) == 0:
        return
    grouped_nodes = {
        ident: chunk
        for ident, nodes in groupby(nodes, ident_key)
        for chunk in chunked(nodes, n=BULK_INSERT_LIMIT)
    }
    log.debug(f"grouped_nodes: {grouped_nodes}")

    # make all bulkInsert requests and execute with batch
    inserts = {
        ident: create_instances_request(nodes, placement_groups, exclusive)
        for ident, nodes in grouped_nodes.items()
    }
    started, failed = batch_execute(inserts)
    if failed:
        failed_reqs = [f"{e}" for _, (_, e) in failed.items()]
        log.error("bulkInsert API failures: {}".format("\n".join(failed_reqs)))
        for ident, (_, exc) in failed.items():
            down_nodes(grouped_nodes[ident], exc._get_reason())

    # wait for all bulkInserts to complete and log any errors
    bulk_operations = [wait_for_operation(op) for op in started.values()]
    for bulk_op in bulk_operations:
        if "error" in bulk_op:
            error = bulk_op["error"]["errors"][0]
            log.error(
                f"bulkInsert operation error: {error['code']} operationName:'{bulk_op['name']}'"
            )

    # Fetch all insert operations from all bulkInserts. Group by error code and log
    successful_inserts, failed_inserts = separate(
        lambda op: "error" in op, get_insert_operations(bulk_operations)
    )
    # Apparently multiple errors are possible... so join with +.
    # grouped_inserts could be made into a dict, but it's not really necessary. Save some memory.
    grouped_inserts = util.groupby_unsorted(
        failed_inserts,
        lambda op: "+".join(err["code"] for err in op["error"]["errors"]),
    )
    for code, failed_ops in grouped_inserts:
        # at least one insert failure
        failed_nodes = [parse_self_link(op["targetLink"]).instance for op in failed_ops]
        hostlist = util.to_hostlist(failed_nodes)
        count = len(failed_nodes)
        log.error(
            f"{count} instances failed to start due to insert operation error: {code} ({hostlist})"
        )
        down_nodes(hostlist, code)
        if log.isEnabledFor(logging.DEBUG):
            msg = "\n".join(
                err["message"] for err in next(failed_ops)["error"]["errors"]
            )
            log.debug(f"{code} message from first node: {msg}")

    # If reconfigure enabled, create subscriptions for successfully started instances
    if lkp.cfg.enable_reconfigure and len(successful_inserts):
        started_nodes = [
            parse_self_link(op["targetLink"]).instance for op in successful_inserts
        ]
        count = len(started_nodes)
        hostlist = util.to_hostlist(started_nodes)
        log.info("create {} subscriptions ({})".format(count, hostlist))
        execute_with_futures(subscription_create, nodes)
Beispiel #10
0
def partition_mounts(mounts):
    """partition into cluster-external and internal mounts"""
    def internal_mount(mount):
        return mount.server_ip == lkp.control_host

    return separate(internal_mount, mounts)