Esempio n. 1
0
    def log(self, input_count, batch_count, additional_values):
        logdict = OrderedDict()
        delta_t = time.time() - self.last_time
        delta_count = input_count - self.last_input_count
        self.last_time = time.time()
        self.last_input_count = input_count

        logdict['time_spent'] = delta_t
        logdict['cumulative_time_spent'] = time.time() - self.start_time
        logdict['input_count'] = delta_count
        logdict['cumulative_input_count'] = input_count
        logdict['cumulative_batch_count'] = batch_count
        if delta_t > 0:
            logdict['inputs_per_sec'] = delta_count / delta_t
        else:
            logdict['inputs_per_sec'] = 0.0

        for k in sorted(viewkeys(additional_values)):
            logdict[k] = additional_values[k]

        # Write the headers if they are not written yet
        if self.headers is None:
            self.headers = list(viewkeys(logdict))
            self.logstr(",".join(self.headers))

        self.logstr(",".join(str(v) for v in viewvalues(logdict)))

        for logger in self.external_loggers:
            try:
                logger.log(logdict)
            except Exception as e:
                logging.warn(
                    "Failed to call ExternalLogger: {}".format(e), e)
Esempio n. 2
0
def prepare_graph(graph1, graph2):
    """Fix ids on graphs, match the <main> node
    Return hashable nodes1 and nodes2 from g1 and g2, respectively
    """
    fix_caller_id(graph1)
    fix_caller_id(graph2)
    nodes1 = [HashableDict(x) for x in graph1["nodes"]]
    nodes2 = [HashableDict(x) for x in graph2["nodes"]]
    graph1["hnodes"], graph2["hnodes"] = nodes1, nodes2
    graph1["node_indexes"] = set(range(len(nodes1)))
    graph2["node_indexes"] = set(range(len(nodes2)))
    graph1["levels"] = defaultdict(set)
    graph2["levels"] = defaultdict(set)
    for node in nodes1:
        graph1["levels"][node["node"]["level"]].add(node["index"])
    for node in nodes2:
        graph2["levels"][node["node"]["level"]].add(node["index"])
    if nodes1 and nodes2:
        if nodes1[0]['name'] != nodes2[0]['name']:
            nodes1[0]["name"] = "<main>"
            nodes2[0]["name"] = "<main>"
        graph1["max_level"] = max(viewkeys(graph1["levels"]))
        graph2["max_level"] = max(viewkeys(graph2["levels"]))
    else:
        graph1["max_level"] = -1
        graph2["max_level"] = -1

    return nodes1, nodes2
    def log(self, input_count, batch_count, additional_values):
        logdict = OrderedDict()
        delta_t = time.time() - self.last_time
        delta_count = input_count - self.last_input_count
        self.last_time = time.time()
        self.last_input_count = input_count

        logdict['time_spent'] = delta_t
        logdict['cumulative_time_spent'] = time.time() - self.start_time
        logdict['input_count'] = delta_count
        logdict['cumulative_input_count'] = input_count
        logdict['cumulative_batch_count'] = batch_count
        if delta_t > 0:
            logdict['inputs_per_sec'] = delta_count / delta_t
        else:
            logdict['inputs_per_sec'] = 0.0

        for k in sorted(viewkeys(additional_values)):
            logdict[k] = additional_values[k]

        # Write the headers if they are not written yet
        if self.headers is None:
            self.headers = list(viewkeys(logdict))
            self.logstr(",".join(self.headers))

        self.logstr(",".join(str(v) for v in viewvalues(logdict)))

        for logger in self.external_loggers:
            try:
                logger.log(logdict)
            except Exception as e:
                logging.warn(
                    "Failed to call ExternalLogger: {}".format(e), e)
Esempio n. 4
0
def prepare_graph(graph1, graph2):
    """Fix ids on graphs, match the <main> node
    Return hashable nodes1 and nodes2 from g1 and g2, respectively
    """
    fix_caller_id(graph1)
    fix_caller_id(graph2)
    nodes1 = [HashableDict(x) for x in graph1["nodes"]]
    nodes2 = [HashableDict(x) for x in graph2["nodes"]]
    graph1["hnodes"], graph2["hnodes"] = nodes1, nodes2
    graph1["node_indexes"] = set(range(len(nodes1)))
    graph2["node_indexes"] = set(range(len(nodes2)))
    graph1["levels"] = defaultdict(set)
    graph2["levels"] = defaultdict(set)
    for node in nodes1:
        graph1["levels"][node["node"]["level"]].add(node["index"])
    for node in nodes2:
        graph2["levels"][node["node"]["level"]].add(node["index"])
    if nodes1 and nodes2:
        nodes1[0]["name"] = "<main>"
        nodes2[0]["name"] = "<main>"
        graph1["max_level"] = max(viewkeys(graph1["levels"]))
        graph2["max_level"] = max(viewkeys(graph2["levels"]))
    else:
        graph1["max_level"] = -1
        graph2["max_level"] = -1

    return nodes1, nodes2
Esempio n. 5
0
 def testFilterInclExclDirectories(self):
     "Test MSUnmerged with including and excluding directories filter"
     toDeleteDict = {
         "/store/unmerged/express/prod/2020/1/12": [
             "/store/unmerged/express/prod/2020/1/12/log8.tar",
             "/store/unmerged/express/prod/2020/1/12/log9.tar"
         ]
     }
     rseData = getBasicRSEData()
     self.msUnmerged.msConfig['dirFilterIncl'] = [
         "/store/unmerged/data/prod/2018/", "/store/unmerged/express"
     ]
     self.msUnmerged.msConfig['dirFilterExcl'] = [
         "/store/unmerged/logs", "/store/unmerged/data/prod",
         "/store/unmerged/alan/prod"
     ]
     self.msUnmerged.protectedLFNs = set()
     filterData = self.msUnmerged.filterUnmergedFiles(rseData)
     self.assertEqual(filterData['counters']['dirsToDeleteAll'], 1)
     self.assertItemsEqual(viewkeys(filterData['files']['toDelete']),
                           viewkeys(toDeleteDict))
     self.assertItemsEqual(
         list(filterData['files']['toDelete']
              ['/store/unmerged/express/prod/2020/1/12']),
         toDeleteDict['/store/unmerged/express/prod/2020/1/12'])
Esempio n. 6
0
def main():
    """
    Expects a dataset name as input argument.
    It then queries Rucio and DBS and compare their blocks and
    number of files.
    """
    if len(sys.argv) != 2:
        print("A dataset name must be provided in the command line")
        sys.exit(1)
    datasetName = sys.argv[1]

    logger = loggerSetup(logging.INFO)

    rucioOutput = getFromRucio(datasetName, logger)
    dbsOutput, dbsFilesCounter = getFromDBS(datasetName, logger)

    logger.info("*** Dataset: %s", datasetName)
    logger.info("Rucio file count : %s", sum(viewvalues(rucioOutput)))
    logger.info("DBS file count   : %s",
                dbsFilesCounter['valid'] + dbsFilesCounter['invalid'])
    logger.info(" - valid files   : %s", dbsFilesCounter['valid'])
    logger.info(" - invalid files : %s", dbsFilesCounter['invalid'])
    logger.info("Blocks in Rucio but not in DBS: %s",
                set(viewkeys(rucioOutput)) - set(viewkeys(dbsOutput)))
    logger.info("Blocks in DBS but not in Rucio: %s",
                set(viewkeys(dbsOutput)) - set(viewkeys(rucioOutput)))

    for blockname in rucioOutput:
        if blockname not in dbsOutput:
            logger.error("This block does not exist in DBS: %s", blockname)
            continue
        if rucioOutput[blockname] != sum(viewvalues(dbsOutput[blockname])):
            logger.warning("Block with file mismatch: %s", blockname)
            logger.warning("\tRucio: %s\t\tDBS: %s", rucioOutput[blockname],
                           sum(viewvalues(dbsOutput[blockname])))
Esempio n. 7
0
def oob_list(session, mode, *args, **kwargs):
    """
    Called with the `LIST <MODE>`  MSDP command.

    Args:
        session (Session): The Session asking for the information
        mode (str): The available properties. One of
            "COMMANDS"               Request an array of commands supported
                                     by the server.
            "LISTS"                  Request an array of lists supported
                                     by the server.
            "CONFIGURABLE_VARIABLES" Request an array of variables the client
                                     can configure.
            "REPORTABLE_VARIABLES"   Request an array of variables the server
                                     will report.
            "REPORTED_VARIABLES"     Request an array of variables currently
                                     being reported.
            "SENDABLE_VARIABLES"     Request an array of variables the server
                                     will send.
    Examples:
        oob in: LIST COMMANDS
        oob out: (COMMANDS, (SEND, REPORT, LIST, ...)

    """
    mode = mode.upper()
    if mode == "COMMANDS":
        session.msg(oob=("COMMANDS", ("LIST",
                                     "REPORT",
                                     "UNREPORT",
                                     # "RESET",
                                     "SEND")))
    elif mode == "REPORTABLE_VARIABLES":
        session.msg(oob=("REPORTABLE_VARIABLES", tuple(key for key in viewkeys(OOB_REPORTABLE))))
    elif mode == "REPORTED_VARIABLES":
        # we need to check so as to use the right return value depending on if it is
        # an Attribute (identified by tracking the db_value field) or a normal database field
        # reported is a list of tuples (obj, propname, args, kwargs)
        reported = OOB_HANDLER.get_all_monitors(session)
        reported = [rep[0].key if rep[1] == "db_value" else rep[1] for rep in reported]
        session.msg(oob=("REPORTED_VARIABLES", reported))
    elif mode == "SENDABLE_VARIABLES":
        session.msg(oob=("SENDABLE_VARIABLES", tuple(key for key in viewkeys(OOB_REPORTABLE))))
    elif mode == "CONFIGURABLE_VARIABLES":
        # Not implemented (game specific)
        oob_error(session, "Not implemented (game specific)")
    else:
        # mode == "LISTS" or not given
        session.msg(oob=("LISTS",("REPORTABLE_VARIABLES",
                                  "REPORTED_VARIABLES",
                                  # "CONFIGURABLE_VARIABLES",
                                  "SENDABLE_VARIABLES")))
Esempio n. 8
0
 def test__iter_stat_sources(self):
     st = fbstat.StatStorage("test/fess/data/test1.stats", [
         "test/fess/data/fallback1.stats", "test/fess/data/fallback2.stats"
     ])
     source_iter = st._iter_stat_sources()
     first = next(source_iter)
     self.assertEqual(viewkeys(first["stem"]), {5})
     self.assertEqual(len(first["stem"][5]), 1)
     second = next(source_iter)
     self.assertEqual(viewkeys(second["stem"]), {6})
     self.assertEqual(len(second["stem"][6]), 1)
     third = next(source_iter)
     self.assertEqual(viewkeys(third["stem"]), {5, 10})
     self.assertEqual(len(third["stem"][5]), 2)
Esempio n. 9
0
def calcDistMatchArr(matchArr, tKey, mKey):
    """Calculate the euclidean distance of all array positions in "matchArr".

    :param matchArr: a dictionary of ``numpy.arrays`` containing at least two
        entries that are treated as cartesian coordinates.
    :param tKey: #TODO: docstring
    :param mKey: #TODO: docstring

    :returns: #TODO: docstring

            {'eucDist': numpy.array([eucDistance, eucDistance, ...]),
             'posPairs': numpy.array([[pos1, pos2], [pos1, pos2], ...])
             }
    """
    #Calculate all sorted list of all eucledian feature distances
    matchArrSize = listvalues(matchArr)[0].size

    distInfo = {'posPairs': list(), 'eucDist': list()}
    _matrix = numpy.swapaxes(numpy.array([matchArr[tKey], matchArr[mKey]]), 0,
                             1)

    for pos1 in range(matchArrSize - 1):
        for pos2 in range(pos1 + 1, matchArrSize):
            distInfo['posPairs'].append((pos1, pos2))
    distInfo['posPairs'] = numpy.array(distInfo['posPairs'])
    distInfo['eucDist'] = scipy.spatial.distance.pdist(_matrix)

    distSort = numpy.argsort(distInfo['eucDist'])
    for key in list(viewkeys(distInfo)):
        distInfo[key] = distInfo[key][distSort]

    return distInfo
Esempio n. 10
0
def InitFromLSTMParams(lstm_pblobs, param_values):
    '''
    Set the parameters of LSTM based on predefined values
    '''
    weight_params = GetLSTMParamNames()['weights']
    bias_params = GetLSTMParamNames()['biases']
    for input_type in viewkeys(param_values):
        weight_values = [
            param_values[input_type][w].flatten()
            for w in weight_params
        ]
        wmat = np.array([])
        for w in weight_values:
            wmat = np.append(wmat, w)
        bias_values = [
            param_values[input_type][b].flatten()
            for b in bias_params
        ]
        bm = np.array([])
        for b in bias_values:
            bm = np.append(bm, b)

        weights_blob = lstm_pblobs[input_type]['weights']
        bias_blob = lstm_pblobs[input_type]['biases']
        cur_weight = workspace.FetchBlob(weights_blob)
        cur_biases = workspace.FetchBlob(bias_blob)

        workspace.FeedBlob(
            weights_blob,
            wmat.reshape(cur_weight.shape).astype(np.float32))
        workspace.FeedBlob(
            bias_blob,
            bm.reshape(cur_biases.shape).astype(np.float32))
Esempio n. 11
0
def InitFromLSTMParams(lstm_pblobs, param_values):
    '''
    Set the parameters of LSTM based on predefined values
    '''
    weight_params = GetLSTMParamNames()['weights']
    bias_params = GetLSTMParamNames()['biases']
    for input_type in viewkeys(param_values):
        weight_values = [
            param_values[input_type][w].flatten()
            for w in weight_params
        ]
        wmat = np.array([])
        for w in weight_values:
            wmat = np.append(wmat, w)
        bias_values = [
            param_values[input_type][b].flatten()
            for b in bias_params
        ]
        bm = np.array([])
        for b in bias_values:
            bm = np.append(bm, b)

        weights_blob = lstm_pblobs[input_type]['weights']
        bias_blob = lstm_pblobs[input_type]['biases']
        cur_weight = workspace.FetchBlob(weights_blob)
        cur_biases = workspace.FetchBlob(bias_blob)

        workspace.FeedBlob(
            weights_blob,
            wmat.reshape(cur_weight.shape).astype(np.float32))
        workspace.FeedBlob(
            bias_blob,
            bm.reshape(cur_biases.shape).astype(np.float32))
Esempio n. 12
0
    def __lt__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented

        if self.quality < other.quality:
            return True
        elif self.quality > other.quality:
            return False

        if self.type != other.type:
            return self.type is STAR

        if self.subtype != other.subtype:
            return self.subtype is STAR

        return viewkeys(self.parameters) < viewkeys(other.parameters)
Esempio n. 13
0
    def __init__(self, url, dispatcher=None, agent=None, logger_name=__name__):
        super(ServiceStatusClient, self).__init__(url)

        self._state_lock = Lock()
        self._rpc_lock = Lock()
        self._rpc_dispatcher = dispatcher
        self._queued_rpcs = {}

        self._logger = logging.getLogger(logger_name)
        self.services = {}
        self._name_map = {}
        self._on_change_callback = None

        # Register callbacks for all of the status notifications
        self.add_message_type(command_formats.ServiceStatusChanged,
                              self._on_status_change)
        self.add_message_type(command_formats.ServiceAdded,
                              self._on_service_added)
        self.add_message_type(command_formats.HeartbeatReceived,
                              self._on_heartbeat)
        self.add_message_type(command_formats.NewMessage, self._on_message)
        self.add_message_type(command_formats.NewHeadline, self._on_headline)
        self.add_message_type(command_formats.RPCCommand, self._on_rpc_command)
        self.add_message_type(command_formats.RPCResponse,
                              self._on_rpc_response)
        self.start()

        with self._state_lock:
            self.services = self.sync_services()
            for i, name in enumerate(viewkeys(self.services)):
                self._name_map[i] = name

        if agent is not None:
            self.register_agent(agent)
Esempio n. 14
0
    def __eq__(self, other):
        """Approximate numerical equality."""
        if not isinstance(other, type(self)):
            return NotImplemented

        for term in viewkeys(self.terms) | viewkeys(other.terms):
            if term in self.terms and term in other.terms:
                if not numpy.isclose(self.terms[term], other.terms[term]):
                    return False
            elif term in self.terms:
                if not numpy.isclose(self.terms[term], 0.0):
                    return False
            else:
                if not numpy.isclose(other.terms[term], 0.0):
                    return False
        return True
Esempio n. 15
0
 def update(dic1, dic2):
     '''update dic1 with dic2 recursively'''
     dickeys = {k: dic2.pop(k) for k in viewkeys(dic1) if isinstance(dic1[k], dict) and
                isinstance(dic2.get(k, None), dict)}
     dic1.update(dic2)
     for k in dickeys:
         update(dic1[k], dickeys[k])
Esempio n. 16
0
def clean_object_caches(obj):
    """
    Clean all object caches on the given object.

    Args:
        obj (Object instace): An object whose caches to clean.

    Notes:
        This is only the contents cache these days.

    """
    global _TYPECLASSMODELS, _OBJECTMODELS
    if not _TYPECLASSMODELS:
        from evennia.typeclasses import models as _TYPECLASSMODELS

    if not obj:
        return
    # contents cache
    try:
        _SA(obj, "_contents_cache", None)
    except AttributeError:
        pass

    # on-object property cache
    [_DA(obj, cname) for cname in viewkeys(obj.__dict__)
                     if cname.startswith("_cached_db_")]
    try:
        hashid = _GA(obj, "hashid")
        _TYPECLASSMODELS._ATTRIBUTE_CACHE[hashid] = {}
    except AttributeError:
        pass
Esempio n. 17
0
def ShiftActivationDevices(model, activations, shifts):
    '''
    Function to enable simple model-parallellism for data_parallel_model
    models. 'shifts' is a dictionary from_gpu -> to_gpu, and activations is
    a list of activation blobs (wout gpu_x/ prefix -- use GetActivationBlobs()).

    Operators handling these activations are shifted to the gpu declared in
    'shifts'. Also related operators such as gradient operators will be moved.
    Appropriate copy-ops are inserted.

    This allows shifting memory usage from one gpu to another, enabling bigger
    models to be trained.
    '''
    assert set(viewvalues(shifts)).intersection(set(viewkeys(shifts))) == set()
    for from_device, to_device in viewitems(shifts):
        log.info(
            "Shifting {} activations from {} --> {}".
            format(len(activations), from_device, to_device)
        )
        _ShiftActivationDevices(model, activations, from_device, to_device)

    param_init_net, blob_to_device = core.InjectCrossDeviceCopies(model.param_init_net)
    net, _blob_to_device = core.InjectCrossDeviceCopies(model.net, blob_to_device)
    model.param_init_net = param_init_net
    model.net = net
Esempio n. 18
0
def calcDistMatchArr(matchArr, tKey, mKey):
    """Calculate the euclidean distance of all array positions in "matchArr".

    :param matchArr: a dictionary of ``numpy.arrays`` containing at least two
        entries that are treated as cartesian coordinates.
    :param tKey: #TODO: docstring
    :param mKey: #TODO: docstring

    :returns: #TODO: docstring

            {'eucDist': numpy.array([eucDistance, eucDistance, ...]),
             'posPairs': numpy.array([[pos1, pos2], [pos1, pos2], ...])
             }
    """
    #Calculate all sorted list of all eucledian feature distances
    matchArrSize = listvalues(matchArr)[0].size

    distInfo = {'posPairs': list(), 'eucDist': list()}
    _matrix = numpy.swapaxes(numpy.array([matchArr[tKey], matchArr[mKey]]), 0, 1)

    for pos1 in range(matchArrSize-1):
        for pos2 in range(pos1+1, matchArrSize):
            distInfo['posPairs'].append((pos1, pos2))
    distInfo['posPairs'] = numpy.array(distInfo['posPairs'])
    distInfo['eucDist'] = scipy.spatial.distance.pdist(_matrix)

    distSort = numpy.argsort(distInfo['eucDist'])
    for key in list(viewkeys(distInfo)):
        distInfo[key] = distInfo[key][distSort]

    return distInfo
Esempio n. 19
0
    def __lt__(self, other):
        if not isinstance(other, self.__class__):
            return NotImplemented

        if self.quality < other.quality:
            return True
        elif self.quality > other.quality:
            return False

        if self.type != other.type:
            return self.type is STAR

        if self.subtype != other.subtype:
            return self.subtype is STAR

        return viewkeys(self.parameters) < viewkeys(other.parameters)
Esempio n. 20
0
    def loadByID(self, configID):
        """
        _loadByID_

        Load a document from the server given its couchID
        """
        try:
            self.document = self.database.document(id=configID)
            if 'owner' in self.document:
                self.connectUserGroup(
                    groupname=self.document['owner'].get('group', None),
                    username=self.document['owner'].get('user', None))
            if '_attachments' in self.document:
                # Then we need to load the attachments
                for key in viewkeys(self.document['_attachments']):
                    self.loadAttachment(name=key)
        except CouchNotFoundError as ex:
            msg = "Document with id %s not found in couch\n" % (configID)
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)
        except Exception as ex:
            msg = "Error loading document from couch\n"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)

        return
Esempio n. 21
0
def ShiftActivationDevices(model, activations, shifts):
    '''
    Function to enable simple model-parallellism for data_parallel_model
    models. 'shifts' is a dictionary from_gpu -> to_gpu, and activations is
    a list of activation blobs (wout gpu_x/ prefix -- use GetActivationBlobs()).

    Operators handling these activations are shifted to the gpu declared in
    'shifts'. Also related operators such as gradient operators will be moved.
    Appropriate copy-ops are inserted.

    This allows shifting memory usage from one gpu to another, enabling bigger
    models to be trained.
    '''
    assert set(viewvalues(shifts)).intersection(set(viewkeys(shifts))) == set()
    for from_device, to_device in viewitems(shifts):
        log.info("Shifting {} activations from {} --> {}".format(
            len(activations), from_device, to_device))
        _ShiftActivationDevices(model, activations, from_device, to_device)

    param_init_net, blob_to_device = core.InjectCrossDeviceCopies(
        model.param_init_net)
    net, _blob_to_device = core.InjectCrossDeviceCopies(
        model.net, blob_to_device)
    model.param_init_net = param_init_net
    model.net = net
Esempio n. 22
0
    def write(self, segment_id, result):  # result is surely not None
        csvwriter, isdict, seg_id_colname = \
            self.csvwriter, self.csvwriterisdict, SEGMENT_ID_COLNAME
        if csvwriter is None:  # instantiate writer according to first input
            isdict = self.csvwriterisdict = isinstance(result, dict)
            # write first column(s):
            if isdict:
                # we need to pass a list and not an iterable cause the iterable needs
                # to be consumed twice (the doc states differently, however...):
                fieldnames = [seg_id_colname]
                fieldnames.extend(viewkeys(result))
                csvwriter = self.csvwriter = csv.DictWriter(
                    self.outputfilehandle,
                    fieldnames=fieldnames,
                    **self.csvwriterkwargs)
                # write header if we need it (file does not exists, append is False, or
                # file exist, append=True but file has no row):
                if not self.append or self.outputfileempty:
                    csvwriter.writeheader()
            else:
                csvwriter = self.csvwriter = csv.writer(
                    self.outputfilehandle, **self.csvwriterkwargs)

        if isdict:
            result[seg_id_colname] = segment_id
        else:
            # we might have numpy arrays, we should support variable types (numeric, strings,..)
            res = [segment_id]
            res.extend(result)
            result = res

        csvwriter.writerow(result)
Esempio n. 23
0
def write_gul_input_files(
        exposure_fp,
        keys_fp,
        target_dir,
        exposure_profile=get_default_exposure_profile(),
        oasis_files_prefixes={
            'items': 'items',
            'complex_items': 'complex_items',
            'coverages': 'coverages',
            'gulsummaryxref': 'gulsummaryxref'
        },
        write_inputs_table_to_file=False):
    """
    Writes the standard Oasis GUL input files, namely::

        items.csv
        coverages.csv
        gulsummaryxref.csv

    with the addition of a complex items file in case of a complex/custom model
    """
    # Clean the target directory path
    target_dir = as_path(target_dir,
                         'Target IL input files directory',
                         is_dir=True,
                         preexists=False)

    gul_inputs_df, exposure_df = get_gul_input_items(
        exposure_fp, keys_fp, exposure_profile=exposure_profile)

    if write_inputs_table_to_file:
        gul_inputs_df.to_csv(path_or_buf=os.path.join(target_dir,
                                                      'gul_inputs.csv'),
                             index=False,
                             encoding='utf-8',
                             chunksize=1000)

    if not gul_inputs_df[['model_data']].any().any():
        gul_inputs_df.drop(['model_data'], axis=1, inplace=True)
        if oasis_files_prefixes.get('complex_items'):
            oasis_files_prefixes.pop('complex_items')

    gul_input_files = {
        k: os.path.join(target_dir, '{}.csv'.format(oasis_files_prefixes[k]))
        for k in viewkeys(oasis_files_prefixes)
    }

    concurrent_tasks = (Task(getattr(sys.modules[__name__],
                                     'write_{}_file'.format(f)),
                             args=(
                                 gul_inputs_df.copy(deep=True),
                                 gul_input_files[f],
                             ),
                             key=f) for f in gul_input_files)
    num_ps = min(len(gul_input_files), multiprocessing.cpu_count())
    for _, _ in multithread(concurrent_tasks, pool_size=num_ps):
        pass

    return gul_input_files, gul_inputs_df, exposure_df
Esempio n. 24
0
def cleanup_bin_directory(directory):
    """
    Clean the tar and binary files.
    """
    for file in chain([TAR_FILE], (f + '.bin' for f in viewkeys(INPUT_FILES))):
        file_path = os.path.join(directory, file)
        if os.path.exists(file_path):
            os.remove(file_path)
Esempio n. 25
0
File: api.py Progetto: 01-/dedupe
 def __init__(self, d, sample_size) :
     if len(d) <= sample_size :
         super(Sample, self).__init__(d)
     else :
         super(Sample, self).__init__({k : d[k]
                                       for k
                                       in random.sample(viewkeys(d), sample_size)})
     self.original_length = len(d)
Esempio n. 26
0
 def __init__(self, d, sample_size, original_length):
     if len(d) <= sample_size:
         super(Sample, self).__init__(d)
     else:
         super(Sample, self).__init__(
             {k: d[k]
              for k in random.sample(viewkeys(d), sample_size)})
     self.original_length = original_length
Esempio n. 27
0
 def add_arguments(self):
     super(NowSetDefault, self).add_arguments()
     add_arg = self.add_argument
     add_arg("--model", type=str, default="*",
             choices=["*"] + list(viewkeys(MetaModel.__classes__)),
             help="""specifies the model""")
     add_arg("defaults", nargs=argparse.REMAINDER,
             help="Default assingments. Use the format var=value")
Esempio n. 28
0
 def visit_group(self, group):
     nodes = list(viewkeys(group.nodes))
     _group = Group()
     _group.use_id = False
     _group.initialize(nodes[1].visit(self), nodes[0].visit(self))
     for element in nodes[2:]:
         _group.add_subelement(element.visit(self))
     _group.level = group.level
     return _group
Esempio n. 29
0
def expectedLabelPosition(peptide, labelStateInfo, sequence=None,
                          modPositions=None):
    """Returns a modification description of a certain label state of a peptide.

    :param peptide: Peptide sequence used to calculat the expected label state
        modifications
    :param labelStateInfo: An entry of :attr:`LabelDescriptor.labels` that
        describes a label state
    :param sequence: unmodified amino acid sequence of :var:`peptide`, if None
        it is generated by :func:`maspy.peptidemethods.removeModifications()`
    :param modPositions: dictionary describing the modification state of
        "peptide", if None it is generated by
        :func:`maspy.peptidemethods.returnModPositions()`

    :returns: {sequence position: sorted list of expected label modifications
                  on that position, ...
               }
    """
    if modPositions is None:
        modPositions = maspy.peptidemethods.returnModPositions(peptide,
                                                               indexStart=0
                                                               )
    if sequence is None:
        sequence = maspy.peptidemethods.removeModifications(peptide)

    currLabelMods = dict()
    for labelPosition, labelSymbols in viewitems(labelStateInfo['aminoAcidLabels']):
        labelSymbols = aux.toList(labelSymbols)
        if labelSymbols == ['']:
            pass
        elif labelPosition == 'nTerm':
            currLabelMods.setdefault(0, list())
            currLabelMods[0].extend(labelSymbols)
        else:
            for sequencePosition in aux.findAllSubstrings(sequence,
                                                          labelPosition):
                currLabelMods.setdefault(sequencePosition, list())
                currLabelMods[sequencePosition].extend(labelSymbols)

    if labelStateInfo['excludingModifications'] is not None:
        for excludingMod, excludedLabelSymbol in viewitems(labelStateInfo['excludingModifications']):
            if excludingMod not in modPositions:
                continue
            for excludingModPos in modPositions[excludingMod]:
                if excludingModPos not in currLabelMods:
                    continue
                if excludedLabelSymbol not in currLabelMods[excludingModPos]:
                    continue
                if len(currLabelMods[excludingModPos]) == 1:
                    del(currLabelMods[excludingModPos])
                else:
                    excludedModIndex = currLabelMods[excludingModPos].index(excludedLabelSymbol)
                    currLabelMods[excludingModPos].pop(excludedModIndex)

    for sequencePosition in list(viewkeys(currLabelMods)):
        currLabelMods[sequencePosition] = sorted(currLabelMods[sequencePosition])
    return currLabelMods
Esempio n. 30
0
 def visit_group(self, group):
     nodes = list(viewkeys(group.nodes))
     _group = Group()
     _group.use_id = False
     _group.initialize(nodes[1].visit(self), nodes[0].visit(self))
     for element in nodes[2:]:
         _group.add_subelement(element.visit(self))
     _group.level = group.level
     return _group
Esempio n. 31
0
def diff_dict(before, after):
    """Compare dicts.
    Return a dict with keys shared by both dicts that have different values
        key -> [before[key], after[key]]
    """
    result = OrderedDict()
    for key in viewkeys(before):
        if key != "id" and before[key] != after[key]:
            result[key] = [before[key], after[key]]
    return result
Esempio n. 32
0
def diff_dict(before, after):
    """Compare dicts.
    Return a dict with keys shared by both dicts that have different values
        key -> [before[key], after[key]]
    """
    result = OrderedDict()
    for key in viewkeys(before):
        if key != "id" and before[key] != after[key]:
            result[key] = [before[key], after[key]]
    return result
Esempio n. 33
0
    def generate_sample_xml(self, samples=None):
        """Generates the sample XML file

        Parameters
        ----------
        samples : list of str, optional
            The list of samples to be included in the sample xml. If not
            provided or an empty list is provided, all the samples are used

        Returns
        -------
        ET.Element
            Object with sample XML values
        """
        sample_set = ET.Element(
            'SAMPLE_SET', {
                'xmlns:xsi': self.xmlns_xsi,
                "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "sample"
            })

        if not samples:
            samples = viewkeys(self.samples)

        for sample_name in sorted(samples):
            sample_info = dict(self.samples[sample_name])
            sample = ET.SubElement(
                sample_set, 'SAMPLE', {
                    'alias': self._get_sample_alias(sample_name),
                    'center_name': qiita_config.ebi_center_name
                })

            sample_title = ET.SubElement(sample, 'TITLE')
            sample_title.text = escape(clean_whitespace(sample_name))

            sample_sample_name = ET.SubElement(sample, 'SAMPLE_NAME')
            taxon_id = ET.SubElement(sample_sample_name, 'TAXON_ID')
            text = sample_info.pop('taxon_id')
            taxon_id.text = escape(clean_whitespace(text))

            scientific_name = ET.SubElement(sample_sample_name,
                                            'SCIENTIFIC_NAME')
            text = sample_info.pop('scientific_name')
            scientific_name.text = escape(clean_whitespace(text))

            description = ET.SubElement(sample, 'DESCRIPTION')
            text = sample_info.pop('description')
            description.text = escape(clean_whitespace(text))

            if sample_info:
                sample_attributes = ET.SubElement(sample, 'SAMPLE_ATTRIBUTES')
                self._add_dict_as_tags_and_values(sample_attributes,
                                                  'SAMPLE_ATTRIBUTE',
                                                  sample_info)

        return sample_set
Esempio n. 34
0
    def _parse_port(self, port):
        if port is None or len(port) == 0:
            return

        if '@' in port:
            raise ArgumentError(
                "Configuration files are not yet supported as part of a port argument",
                port=port)

        pairs = port.split(';')
        for pair in pairs:
            name, _, value = pair.partition('=')
            if len(name) == 0 or len(value) == 0:
                continue

            name = name.strip()
            value = value.strip()

            if name == 'device':
                device_name = value
                if device_name in DEVICE_ALIASES:
                    device_name = DEVICE_ALIASES[device_name]
                if device_name in KNOWN_DEVICES:
                    self._default_device_info = KNOWN_DEVICES.get(device_name)
                else:
                    raise ArgumentError(
                        "Unknown device name or alias, please select from known_devices",
                        device_name=device_name,
                        known_devices=[x for x in viewkeys(DEVICE_ALIASES)])
            elif name == 'serial':
                self._jlink_serial = value
            elif name == 'mux':
                mux = value
                if mux in KNOWN_MULTIPLEX_FUNCS:
                    self._mux_func = KNOWN_MULTIPLEX_FUNCS[mux]
                else:
                    raise ArgumentError(
                        "Unknown multiplexer, please select from known_multiplex_funcs",
                        mux=mux,
                        known_multiplex_funcs=[
                            x for x in viewkeys(KNOWN_MULTIPLEX_FUNCS)
                        ])
Esempio n. 35
0
 def add_arguments(self):
     super(NowSetDefault, self).add_arguments()
     add_arg = self.add_argument
     add_arg("--model",
             type=str,
             default="*",
             choices=["*"] + list(viewkeys(MetaModel.__classes__)),
             help="""specifies the model""")
     add_arg("defaults",
             nargs=argparse.REMAINDER,
             help="Default assingments. Use the format var=value")
Esempio n. 36
0
    def test_output_and_bin_input_files_are_removed(self):
        with TemporaryDirectory() as d:
            Path(os.path.join(d, TAR_FILE)).touch()

            for f in viewvalues(INPUT_FILES):
                Path(os.path.join(d, f['name'] + '.bin')).touch()

            cleanup_bin_directory(d)

            self.assertFalse(os.path.exists(os.path.join(d, TAR_FILE)))
            for f in viewkeys(INPUT_FILES):
                self.assertFalse(os.path.exists(os.path.join(d, f + '.bin')))
Esempio n. 37
0
    def _trainClassifier(self, **kwargs):  # pragma: no cover
        labels = numpy.array(self.training_data['label'] == b'match',
                             dtype='int8')
        examples = self.training_data['distances']

        classifier_args = backport.signature(self.classifier.fit).parameters

        classifier_args = {k : kwargs[k]
                           for k
                           in viewkeys(kwargs) & classifier_args}

        self.classifier.fit(examples, labels, **classifier_args)
Esempio n. 38
0
    def generate_sample_xml(self, samples=None):
        """Generates the sample XML file

        Parameters
        ----------
        samples : list of str, optional
            The list of samples to be included in the sample xml. If not
            provided or an empty list is provided, all the samples are used

        Returns
        -------
        ET.Element
            Object with sample XML values
        """
        sample_set = ET.Element('SAMPLE_SET', {
            'xmlns:xsi': self.xmlns_xsi,
            "xsi:noNamespaceSchemaLocation": self.xsi_noNSL % "sample"})

        if not samples:
            samples = viewkeys(self.samples)

        for sample_name in sorted(samples):
            sample_info = dict(self.samples[sample_name])
            sample = ET.SubElement(sample_set, 'SAMPLE', {
                'alias': self._get_sample_alias(sample_name),
                'center_name': qiita_config.ebi_center_name}
            )

            sample_title = ET.SubElement(sample, 'TITLE')
            sample_title.text = escape(clean_whitespace(sample_name))

            sample_sample_name = ET.SubElement(sample, 'SAMPLE_NAME')
            taxon_id = ET.SubElement(sample_sample_name, 'TAXON_ID')
            text = sample_info.pop('taxon_id')
            taxon_id.text = escape(clean_whitespace(text))

            scientific_name = ET.SubElement(
                sample_sample_name, 'SCIENTIFIC_NAME')
            text = sample_info.pop('scientific_name')
            scientific_name.text = escape(clean_whitespace(text))

            description = ET.SubElement(sample, 'DESCRIPTION')
            text = sample_info.pop('description')
            description.text = escape(clean_whitespace(text))

            if sample_info:
                sample_attributes = ET.SubElement(sample, 'SAMPLE_ATTRIBUTES')
                self._add_dict_as_tags_and_values(sample_attributes,
                                                  'SAMPLE_ATTRIBUTE',
                                                  sample_info)

        return sample_set
Esempio n. 39
0
    def getSusceptibility(self, size=None):
        """
		Returns the susceptibility defined as:
		(Sum_{s != size(gc)} n_s * s * s) / (Sum_{s != size(gc)} n_s * s)
		Size is the number of nodes in the network. If it is given, it is assumed
		that communities of size 1 are not included in this community structure.
		If there is only 0 or 1 community, zero is returned.
		"""
        sd = self.getSizeDist()

        if len(sd) < 1:
            if size == None or size == 0:
                return 0.0
            else:
                return 1.0

        sizeSum = 0
        for key in viewkeys(sd):
            sizeSum += key * sd[key]

        #If no size is given, assume that also communities of size 1 are included
        if size == None:
            sus = 0
            size = sizeSum
        else:
            sus = size - sizeSum  #s=1
            assert (sus >= 0)

        #Remove largest component
        gc = max(viewkeys(sd))
        sd[gc] = 0

        #Calculate the susceptibility
        for key in viewkeys(sd):
            sus += key * key * sd[key]
        if (size - gc) == 0:
            return 0.0
        else:
            return float(sus) / float(size - gc)
Esempio n. 40
0
def analyze_step(analyzer, step):
    proto = step.Proto()
    with analyzer.set_workspace(do_copy=proto.create_workspace):
        if proto.report_net:
            with analyzer.set_workspace(do_copy=True):
                analyzer(step.get_net(proto.report_net))
        all_new_blobs = set()
        substeps = step.Substeps() + [step.get_net(n) for n in proto.network]
        for substep in substeps:
            with analyzer.set_workspace(
                    do_copy=proto.concurrent_substeps) as ws_in:
                analyzer(substep)
                if proto.should_stop_blob:
                    analyzer.need_blob(proto.should_stop_blob)
            if proto.concurrent_substeps:
                new_blobs = set(viewkeys(ws_in)) - set(viewkeys(analyzer.workspace))
                assert len(all_new_blobs & new_blobs) == 0, (
                    'Error: Blobs created by multiple parallel steps: %s' % (
                        ', '.join(all_new_blobs & new_blobs)))
                all_new_blobs |= new_blobs
    for x in all_new_blobs:
        analyzer.define_blob(x)
Esempio n. 41
0
def analyze_step(analyzer, step):
    proto = step.Proto()
    with analyzer.set_workspace(do_copy=proto.create_workspace):
        if proto.report_net:
            with analyzer.set_workspace(do_copy=True):
                analyzer(step.get_net(proto.report_net))
        all_new_blobs = set()
        substeps = step.Substeps() + [step.get_net(n) for n in proto.network]
        for substep in substeps:
            with analyzer.set_workspace(
                    do_copy=proto.concurrent_substeps) as ws_in:
                analyzer(substep)
                if proto.should_stop_blob:
                    analyzer.need_blob(proto.should_stop_blob)
            if proto.concurrent_substeps:
                new_blobs = set(viewkeys(ws_in)) - set(viewkeys(analyzer.workspace))
                assert len(all_new_blobs & new_blobs) == 0, (
                    'Error: Blobs created by multiple parallel steps: %s' % (
                        ', '.join(all_new_blobs & new_blobs)))
                all_new_blobs |= new_blobs
    for x in all_new_blobs:
        analyzer.define_blob(x)
Esempio n. 42
0
    def _parse_conn_string(self, conn_string):
        """Parse a connection string passed from 'debug -c' or 'connect_direct'
            Returns True if any settings changed in the debug port, which
            would require a jlink disconnection """
        disconnection_required = False
        """If device not in conn_string, set to default info"""
        if conn_string is None or 'device' not in conn_string:
            if self._default_device_info is not None and self._device_info != self._default_device_info:
                disconnection_required = True
                self._device_info = self._default_device_info

        if conn_string is None or len(conn_string) == 0:
            return disconnection_required

        if '@' in conn_string:
            raise ArgumentError(
                "Configuration files are not yet supported as part of a connection string argument",
                conn_string=conn_string)

        pairs = conn_string.split(';')
        for pair in pairs:
            name, _, value = pair.partition('=')
            if len(name) == 0 or len(value) == 0:
                continue

            name = name.strip()
            value = value.strip()

            if name == 'device':
                if value in DEVICE_ALIASES:
                    device_name = DEVICE_ALIASES[value]
                if device_name in KNOWN_DEVICES:
                    device_info = KNOWN_DEVICES.get(device_name)
                    if self._device_info != device_info:
                        self._device_info = device_info
                        disconnection_required = True
                else:
                    raise ArgumentError(
                        "Unknown device name or alias, please select from known_devices",
                        device_name=value,
                        known_devices=[x for x in viewkeys(DEVICE_ALIASES)])
            elif name == 'channel':
                if self._mux_func is not None:
                    if self._channel != int(value):
                        self._channel = int(value)
                        disconnection_required = True
                else:
                    print(
                        "Warning: multiplexing architecture not selected, channel will not be set"
                    )
        return disconnection_required
Esempio n. 43
0
def _GroupByDevice(model, devices, params, non_data_params):
    '''
    Groups blobs by device, returning a map of [blobname] = {0: BlobRef, 1: ..}.
    Returns ordered dictionary, ensuring the original order.
    '''
    grouped = OrderedDict()
    # Only consider params that were created to be  "data parallel"
    params = params[len(non_data_params):]
    assert len(params) % len(devices) == 0,\
           "There should be equal number of params per device"

    num_params_per_device = int(len(params) / len(devices))

    for i, p in enumerate(params):
        assert isinstance(p, core.BlobReference) or \
            isinstance(p, core.GradientSlice), \
            "Param {} is not BlobReference or GradientSlice".format(p)

        name = stripParamName(p)
        gpuid = devices[i // num_params_per_device]

        if isinstance(p, core.BlobReference):
            assert "{}_{}/".format(model._device_prefix, gpuid) in p.GetNameScope(),\
                "Param {} expected to have namescope '{}_{}'".format(str(p), model._device_prefix, gpuid)
        else:
            assert "{}_{}/".format(model._device_prefix, gpuid) in p.indices.GetNameScope(),\
                "Indices {} expected to have namescope '{}_{}'".format(str(p), model._device_prefix, gpuid)
            assert "{}_{}/".format(model._device_prefix, gpuid) in p.values.GetNameScope(),\
                "Values {} expected to have namescope '{}_{}'".format(str(p), model._device_prefix, gpuid)

        if name not in grouped:
            grouped[name] = {}
        grouped[name][gpuid] = p

    # Confirm consistency
    for j, (p, ps) in enumerate(viewitems(grouped)):
        assert \
            len(ps) == len(devices), \
            "Param {} does not have value for each device (only {}: {})".format(
                p, len(ps), ps,
            )
        # Ensure ordering
        if (ps[devices[0]] != params[j]):
            log.error("Params: {}".format(params))
            log.error("Grouped: {}".format(list(viewkeys(grouped))))
            assert ps[devices[0]] == params[j], \
                "Incorrect ordering: {}".format(ps)

    return grouped
Esempio n. 44
0
 def execute(self, func, line, cell, magic_cls):
     formatter = DollarFormatter()
     cell = formatter.vformat(cell, args=[], kwargs=magic_cls.shell.user_ns.copy())
     _, args = self.arguments(func, line)
     result = relational.query(text_to_native_str(cell))
     if args.result:
         magic_cls.shell.user_ns[args.result] = result
     else:
         result = list(result)
         table = Table()
         if result:
             table.append(list(viewkeys(result[0])))
         for line in result:
             table.append(list(viewvalues(line)))
         return table
Esempio n. 45
0
def encode_basic_properties(body_size, props):
    pieces = [''] * 14
    flags = 0
    enc = ENCODE_PROPS_BASIC

    for key in BASIC_PROPS_SET & set(futils.viewkeys(props)):
        i, f, fun = enc[key]
        flags |= f
        pieces[i] = fun(props[key])

    return (0x02,
            join_as_bytes((
                pack('!HHQH', CLASS_BASIC, 0, body_size, flags),
                join_as_bytes(pieces),
            )))
Esempio n. 46
0
def modAminoacidsFromLabelInfo(labelDescriptor):
    """Returns a set of all amino acids and termini which can bear a label, as
    described in "labelDescriptor".

    :param labelDescriptor: :class:`LabelDescriptor` describes the label setup
        of an experiment

    :returns: #TODO: docstring
    """
    modAminoacids = set()
    for labelStateEntry in viewvalues(labelDescriptor.labels):
        for labelPositionEntry in viewkeys(labelStateEntry['aminoAcidLabels']):
            for modAminoacid in aux.toList(labelPositionEntry):
                if modAminoacid != '':
                    modAminoacids.add(modAminoacid)
    return modAminoacids
Esempio n. 47
0
    def __mul__(self, other):

        if len(self) <= len(other):
            smaller, larger = self._d, other._d
        else:
            smaller, larger = other._d, self._d

        # it's meaningfully faster to check in the key dictview
        # of 'larger' than in the dict directly
        larger_keys = viewkeys(larger)

        common = {k: v * larger[k]
                  for k, v in viewitems(smaller)
                  if k in larger_keys}

        return Counter(common)
Esempio n. 48
0
def modAminoacidsFromLabelInfo(labelDescriptor):
    """Returns a set of all amino acids and termini which can bear a label, as
    described in "labelDescriptor".

    :param labelDescriptor: :class:`LabelDescriptor` describes the label setup
        of an experiment

    :returns: #TODO: docstring
    """
    modAminoacids = set()
    for labelStateEntry in viewvalues(labelDescriptor.labels):
        for labelPositionEntry in viewkeys(labelStateEntry['aminoAcidLabels']):
            for modAminoacid in aux.toList(labelPositionEntry):
                if modAminoacid != '':
                    modAminoacids.add(modAminoacid)
    return modAminoacids
Esempio n. 49
0
def print_replaced_attributes(replaced, ignore=("id",), extra=tuple(),
                              names=None):
    """Print attributes diff"""
    names = names or {}
    for (removed, added) in replaced:
        print("  Name: {}".format(removed.name))
        output = []
        for key in viewkeys(removed.to_dict(ignore=ignore, extra=extra)):
            removed_attr = getattr(removed, key)
            added_attr = getattr(added, key)
            if removed_attr != added_attr:
                output.append("    {} changed from {} to {}".format(
                    names.get(key, key.capitalize().replace("_", " ")),
                    removed_attr or "<None>", added_attr or "<None>"))
        print("\n".join(output))
        print()
Esempio n. 50
0
    def _fix_dependencies(self):
        """Propagate dependencies, removing missing nodes"""
        created = self.created
        synonyms = self.synonyms
        arriving_arrows = self.arriving_arrows
        departing_arrows = self.departing_arrows

        removed = (
            set(viewvalues(self.variables))
            - created
            - set(viewkeys(synonyms))
        )
        for variable in removed:
            variable_is_box = "box--" in variable.name
            for source, typ_sv in viewitems(arriving_arrows[variable]):
                if (variable_is_box and "box--" in source.name and
                        not self.config.show_blackbox_dependencies):
                    continue
                for target, typ_vt in viewitems(departing_arrows[variable]):
                    if variable_is_box and source.type == target.type == "arg":
                        continue
                    typ = typ_sv or typ_vt
                    if not typ and not variable_is_box:
                        typ = "dashed"

                    #del arriving_arrows[target][variable]
                    #del departing_arrows[variable][target]
                    departing_arrows[source][target] = typ
                    arriving_arrows[target][source] = typ
            del arriving_arrows[variable]
            for target, typ_vt in viewitems(departing_arrows[variable]):
                if (variable_is_box and "box--" in target.name and
                        not self.config.show_blackbox_dependencies):
                    continue
                for source, typ_sv in viewitems(arriving_arrows[variable]):
                    if variable_is_box and source.type == target.type == "arg":
                        continue
                    typ = typ_sv or typ_vt
                    if not typ and not variable_is_box:
                        typ = "dashed"
                    #del arriving_arrows[variable][source]
                    #del departing_arrows[source][variable]
                    departing_arrows[source][target] = typ
                    arriving_arrows[target][source] = typ
            del departing_arrows[variable]
Esempio n. 51
0
def getContGroupArrays(arrays, groupPositions, arrayKeys=None):
    """Convinience function to generate a subset of arrays from specified array
    positions.

    :param arrays: a dictionary containing ``numpy.arrays``
    :param groupPositions: arrays positions that should be included in the
        subset of arrays
    :param arrayKeys: a list of "arrays" keys that should be included in the
        subset of arrays, if None all keys are selected

    :returns: a dictionary containing ``numpy.arrays``
    """
    if arrayKeys is None:
        arrayKeys = list(viewkeys(arrays))
    matchingArrays = dict()
    for key in arrayKeys:
        matchingArrays[key] = arrays[key][groupPositions]
    return matchingArrays
Esempio n. 52
0
    def set_classes_default(mcs, attr, value, instances=False, model="*"):
        """Set DEFAULT attribute for Model classes that match model filter

        Arguments:
        model -- name of model class
        attr -- attribute name
        value -- new attribute value


        Keyword arguments:
        instances -- update instances too (default=False)
        model -- filter model (default="*")
        """
        if model == "*":
            for name in viewkeys(mcs.__classes__):
                mcs.set_class_default(name, attr, value, instances=instances)
        else:
            mcs.set_class_default(model, attr, value, instances=instances)
Esempio n. 53
0
def _findSamesetProteins(protToPeps, proteins=None):
    """Find proteins that are mapped to an identical set of peptides.

    :param protToPeps: dict, for each protein (=key) contains a set of
        associated peptides (=value). For Example {protein: {peptide, ...}, ...}
    :param proteins: iterable, proteins that are tested for having equal
        evidence. If not specified all proteins are tested
    :returns: a list of sorted protein tuples that share equal peptide evidence
    """
    proteins = viewkeys(protToPeps) if proteins is None else proteins

    equalEvidence = ddict(set)
    for protein in proteins:
        peptides = protToPeps[protein]
        equalEvidence[tuple(sorted(peptides))].add(protein)
    equalProteins = list()
    for proteins in viewvalues(equalEvidence):
        if len(proteins) > 1:
            equalProteins.append(tuple(sorted(proteins)))
    return equalProteins
Esempio n. 54
0
def neighborhood1(graph1, graph2, mapping, cmp_node):
    """First neighborhood of VND. Add missing combinations"""
    tried = set()

    def add_to_mapping(to_add, new_mapping, swapped):
        """Add combination to mapping"""
        nodes1, nodes2 = graph1["hnodes"], graph2["hnodes"]
        added = []
        for node_id1, node_id2 in to_add:
            if swapped:
                node_id1, node_id2 = node_id2, node_id1
            node1, node2 = nodes1[node_id1], nodes2[node_id2]
            if cmp_node(node1, node2):
                added.append((node_id1, node_id2))
                new_mapping[node1] = node2
        return tuple(added)

    not_mapped1 = (graph1["node_indexes"] -
                   {n["index"] for n in viewkeys(mapping)})
    not_mapped2 = (graph2["node_indexes"] -
                   {n["index"] for n in viewvalues(mapping)})
    swapped = False
    if len(not_mapped2) > len(not_mapped1):
        swapped = True
        not_mapped1, not_mapped2 = not_mapped2, not_mapped1

    possibilities = [
        list(zip(x, not_mapped2))
        for x in itertools.permutations(not_mapped1, len(not_mapped2))]

    for full_map in possibilities:
        for i in range(1, len(not_mapped2) + 1):
            for to_add in itertools.combinations(full_map, i):
                if to_add in tried:
                    continue
                new_mapping = copy(mapping)
                to_add = add_to_mapping(to_add, new_mapping, swapped)
                if to_add in tried:
                    continue
                tried.add(to_add)
                yield new_mapping
Esempio n. 55
0
    def from_dict(cls, dictionary):
        """Create a ``TabularMSA`` from a ``dict``.

        Parameters
        ----------
        dictionary : dict
            Dictionary mapping keys to alphabet-aware scikit-bio sequence
            objects. The ``TabularMSA`` object will have its index labels set
            to the keys in the dictionary.

        Returns
        -------
        TabularMSA
            ``TabularMSA`` object constructed from the keys and sequences in
            `dictionary`.

        See Also
        --------
        to_dict
        sort

        Notes
        -----
        The order of sequences and index labels in the resulting ``TabularMSA``
        object is arbitrary. Use ``TabularMSA.sort`` to set a different order.

        Examples
        --------
        >>> from skbio import DNA, TabularMSA
        >>> seqs = {'a': DNA('ACGT'), 'b': DNA('A--T')}
        >>> msa = TabularMSA.from_dict(seqs)

        """
        # Python 2 and 3 guarantee same order of iteration as long as no
        # modifications are made to the dictionary between calls:
        #     https://docs.python.org/2/library/stdtypes.html#dict.items
        #     https://docs.python.org/3/library/stdtypes.html#
        #         dictionary-view-objects
        return cls(viewvalues(dictionary), index=viewkeys(dictionary))
Esempio n. 56
0
 def __getitem__(self, item):
     """
     item can be a tuple or list of ints or strings, or a single
     int or string. String item is a nested field name, e.g., "a", "a:b",
     "a:b:c". Int item is the index of a field at the first level of the
     Struct.
     """
     if isinstance(item, list) or isinstance(item, tuple):
         keys = list(viewkeys(self.fields))
         return Struct(
             * [
                 (
                     keys[k]
                     if isinstance(k, int) else k, self[k]
                 ) for k in item
             ]
         )
     elif isinstance(item, int):
         return next(islice(viewvalues(self.fields), item, None))
     else:
         field = self._get_field_by_nested_name(item)
         if field is None:
             raise KeyError('field "%s" not found' % (item))
         return field
Esempio n. 57
0
def loadBinaryItemContainer(zippedfile, jsonHook):
    """Imports binaryItems from a zipfile generated by
    :func:`writeBinaryItemContainer`.

    :param zipfile: can be either a path to a file (a string) or a file-like
        object
    :param jsonHook: a custom decoding function for JSON formated strings of the
        binaryItems stored in the zipfile.

    :returns: a dictionary containing binaryItems
        ``{binaryItem.id: binaryItem, ... }``
    """
    binaryItemContainer = dict()
    with zipfile.ZipFile(zippedfile, 'r') as containerZip:
        #Convert the zipfile data into a str object, necessary since
        #containerZip.read() returns a bytes object.
        metadataText = io.TextIOWrapper(containerZip.open('metadata'),
                                        encoding='utf-8'
                                        ).read()
        allMetadata = json.loads(metadataText, object_hook=jsonHook)
        metadataIndex = [str(_) for _ in sorted([int(i) for i in
                                                 viewkeys(allMetadata)
                                                 ])
                         ]
        binarydataFile = containerZip.open('binarydata')
        for index in metadataIndex:
            binaryItem = allMetadata[index][0]
            for binaryMetadata in allMetadata[index][1]:
                arrayKey = binaryMetadata['arrayKey']
                rawdata = binarydataFile.read(binaryMetadata['end'] -
                                              binaryMetadata['start']
                                              )
                array = _arrayFromBytes(rawdata, binaryMetadata)
                binaryItem.arrays[arrayKey] = array
            binaryItemContainer[binaryItem.id] = binaryItem
    return binaryItemContainer
Esempio n. 58
0
 def __eq__(self, other):
     log.debug("Testing equality")
     if type(self) != type(other):
         log.debug("Typecheck failed")
         return NotImplemented
     if viewkeys(self._elem_names) != viewkeys(other._elem_names):
         log.debug("Keys different: self only: {}, other only: {}".format(
             viewkeys(self._elem_names) - viewkeys(other._elem_names),
             viewkeys(other._elem_names) - viewkeys(self._elem_names)))
         return False
     if np.all(np.isnan(self._coordinates)) and np.all(np.isnan(other._coordinates)):
         log.debug("True: All is NAN")
         return True
     for key in self:
         if not np.allclose(self[key], other[key]):
             log.debug("Values for key {} different: {}!={}".format(
                 key, self[key], other[key]))
             return False
     log.debug("Equal!")
     return True
Esempio n. 59
0
    def position_entropies(self, base=None,
                           nan_on_non_standard_chars=True):
        """Return Shannon entropy of positions in Alignment

        Parameters
        ----------
        base : float, optional
            log base for entropy calculation. If not passed, default will be e
            (i.e., natural log will be computed).
        nan_on_non_standard_chars : bool, optional
            if True, the entropy at positions containing characters outside of
            the first sequence's `iupac_standard_characters` will be `np.nan`.
            This is useful, and the default behavior, as it's not clear how a
            gap or degenerate character should contribute to a positional
            entropy. This issue was described in [1]_.

        Returns
        -------
        list
            List of floats of Shannon entropy at `Alignment` positions. Shannon
            entropy is defined in [2]_.

        See Also
        --------
        position_counters
        position_frequencies

        References
        ----------
        .. [1] Identifying DNA and protein patterns with statistically
           significant alignments of multiple sequences.
           Hertz GZ, Stormo GD.
           Bioinformatics. 1999 Jul-Aug;15(7-8):563-77.
        .. [2] A Mathematical Theory of Communication
           CE Shannon
           The Bell System Technical Journal (1948).

        Examples
        --------
        >>> from skbio.core.alignment import Alignment
        >>> from skbio.core.sequence import DNA
        >>> sequences = [DNA('AC--', id="seq1"),
        ...              DNA('AT-C', id="seq2"),
        ...              DNA('TT-C', id="seq3")]
        >>> a1 = Alignment(sequences)
        >>> print(a1.position_entropies())
        [0.63651416829481278, 0.63651416829481278, nan, nan]

        """
        result = []
        # handle empty Alignment case
        if self.is_empty():
            return result

        iupac_standard_characters = self[0].iupac_standard_characters()
        for f in self.position_frequencies():
            if (nan_on_non_standard_chars and
                    len(viewkeys(f) - iupac_standard_characters) > 0):
                result.append(np.nan)
            else:
                result.append(entropy(list(f.values()), base=base))
        return result