Ejemplo n.º 1
0
 def __init__(self):
     self.timestamp = None
     self.trx_per_resource = defaultdict(int)
     self.trx_per_user = defaultdict(int)
     self.trx_per_method = defaultdict(int)
     self.trx_per_status = defaultdict(int)
     self.trx_per_sec = 0.0
Ejemplo n.º 2
0
 def reset(self, timestamp):
     logger.debug("Resetting basic data")
     self.timestamp = timestamp
     self.trx_per_resource = defaultdict(int)
     self.trx_per_user = defaultdict(int)
     self.trx_per_method = defaultdict(int)
     self.trx_per_status = defaultdict(int)
     self.trx_per_sec = 0.0
Ejemplo n.º 3
0
def _iter_events_assemble(waveform: Dict[Any, Union[Stream, Trace]]):
    """ given a dict of the form {event_id: waveforms}, create a new dict of
    the structure {sampling_rage: {event_id: waveforms}}"""
    out = defaultdict(lambda: defaultdict(obspy.Stream))
    for item, value in waveform.items():
        if isinstance(value, Trace):
            value = Stream(traces=[value])
        for tr in value:
            sr = int(np.round(tr.stats.sampling_rate))
            out[sr][item] += tr
    return dict(out)
Ejemplo n.º 4
0
 def __init__(self, name, defaults:  Sequence[Tuple[Union[str, Tuple], object]],
              callables: List[NamedExecutable],
              seconds=60,
              starting_moment: datetime.datetime = None,
              **kwargs):
   super().__init__(name, defaults, _default_factory=lambda: defaultdict(dict), **kwargs)
   self.metric_names: List[str] = [c[0] for c in callables]
   self.seconds = seconds
   # symbol, side -> trade
   self.storage: Dict[Tuple, Deque] = defaultdict(deque)
   self._callables: List[Callable[[List], float]] = [c[1] for c in callables]
   self._from: datetime.datetime = starting_moment
   self._skip_from = False
Ejemplo n.º 5
0
async def material_tree_json(request: web.Request):
    with session_scope() as sess:
        materials: List[Material] = (sess.query(Material).filter(
            Material.enabled.is_(True)).order_by(Material.substance.asc(),
                                                 Material.id.asc()).all())

    material_by_substance = defaultdict(list)
    for material in materials:
        if material.substance:
            material_by_substance[material.substance].append(material)

    tree = {
        'name':
        'materials',
        'children': [{
            'name':
            subst,
            'text':
            subst,
            'children': [{
                'name':
                mat.name,
                'size':
                120000,
                'img':
                nginx_url(mat.get_data_path('previews/bmps.png')),
            } for mat in subst_mats if mat.substance]
        } for subst, subst_mats in material_by_substance.items()],
    }

    return web.json_response(tree, dumps=partial(json.dumps, indent=2))
Ejemplo n.º 6
0
 def __init__(self, gamma, alpha, epsilon, epsilon_decay):
     self.epsilon_decay = epsilon_decay
     self.epsilon = epsilon
     self.alpha = alpha
     self.gamma = gamma
     self.n_actions = 4
     self.Q = defaultdict(lambda: np.zeros(4))
Ejemplo n.º 7
0
def alpha(patches):
    # split into runoff return, patch type, flowlength

    # subsets = [[], []]
    # # Filter on global flowlength
    # for patch in patches:
    #     if patch.model.runoff_return:
    #         subsets[0].append(patch)
    #     else:
    #         subsets[1].append(patch)

    subsets = [patches]
    new_subsets = []
    for subset in subsets:
        subset_dict = defaultdict(list)
        for patch in subset:
            subset_dict[patch.type].append(patch)
        new_subsets += subset_dict.values()
    subsets = new_subsets

    new_subsets = []
    for subset in subsets:
        # Sorting patches on FL
        subset.sort(key=lambda x: sum([cell.FL for cell in x.RL + x.BR]))
        # Splitting category in #FL_RESOLUTION equally sized groups of patches
        new_subsets += list(np.array_split(subset, RESOLUTION))
    subsets = new_subsets

    return subsets
Ejemplo n.º 8
0
    def get_top_subjects(self, top):
        '''Выделение самых встречающихся подлежащих'''
        frequencies = defaultdict(int)

        for sentence in self._sentences:
            for word in sentence:
                if word.part_sentence == PartSentence.Subject:
                    frequencies[word.lemma] = word.frequency

        return sorted(frequencies.items(), key=lambda x: -x[1])[:top]
Ejemplo n.º 9
0
    def out_edges(self):
        out_edges = defaultdict(list)
        out_edge_modifiers = {}
        for f, t, *l in self.ontology.in_edges:

            out_edges[t].append(f)
            out_edge_modifiers[t, f] = l

        self.out_edge_modifiers = out_edge_modifiers

        return out_edges
Ejemplo n.º 10
0
def process_ipynb_output_results(cell_order, outputs):
    keys = list(outputs.keys())
    results = []
    for k in keys[:]:
        # cell order, result order, type
        c, r, t = split_output_name(k)
        if c == cell_order:
            results.append((r, t, outputs[k]))

    dict_results = defaultdict(dict)
    for r, t, o in results:
        dict_results[r][t] = o

    results = []
    for r, ts in dict_results.items():
        if 'htm' in ts:
            results.append(CR(ResultTypes.Stream,
                              r,
                              ts['htm'].decode('UTF-8'),
                              'text/html'))
        elif 'png' in ts:
            results.append(CR(ResultTypes.Image,
                              r,
                              ts['png'],
                              'image/png'))
        elif 'jpg' in ts:
            results.append(CR(ResultTypes.Image,
                              r,
                              ts['jpg'],
                              'image/jpg'))
        elif 'ksh' in ts:
            results.append(CR(ResultTypes.Stream,
                              r,
                              ts['ksh'].decode('UTF-8'),
                              'text/plain'))
        elif 'txt' in ts:
            results.append(CR(ResultTypes.Stream,
                              r,
                              ts['txt'].decode('UTF-8'),
                              'text/plain'))
        elif 'c' in ts:
            results.append(CR(ResultTypes.Stream,
                              r,
                              ts['c'].decode('UTF-8'),
                              'text/plain'))
        elif 'bat' in ts:
            results.append(CR(ResultTypes.Stream,
                              r,
                              ts['bat'].decode('UTF-8'),
                              'text/plain'))
        else:
            logger.error("Unknown result type %s\n%s", repr(ts)[:40], repr(r)[:40])

    return results
Ejemplo n.º 11
0
    def get_context_data(self, **kwargs):
        context = super().get_context_data(**kwargs)
        party_id = kwargs["legacy_slug"]
        party = get_object_or_404(Party, legacy_slug=party_id)

        # Make the party emblems conveniently available in the context too:
        context["emblems"] = party.emblems.all()
        all_post_groups = self.election_data.posts.values_list(
            "group", flat=True).distinct()
        by_post_group = {
            pg: {
                "stats": None,
                "posts_with_memberships": defaultdict(list)
            }
            for pg in all_post_groups
        }
        for membership in (Membership.objects.filter(
                party=party,
                post_election__election=self.election_data,
                role=self.election_data.candidate_membership_role,
        ).select_related().prefetch_related("post", "person")):
            person = membership.person
            post = membership.post
            post_group = post.group
            by_post_group[post_group]["posts_with_memberships"][post].append({
                "membership":
                membership,
                "person":
                person,
                "post":
                post
            })
        # That'll only find the posts that someone from the party is
        # actually standing for, so add any other posts...
        for post in self.election_data.posts.all():
            post_group = post.group
            post_group_data = by_post_group[post_group]
            posts_with_memberships = post_group_data["posts_with_memberships"]
            posts_with_memberships.setdefault(post, [])
        context["party"] = party
        context["party_name"] = party.name
        for post_group, data in by_post_group.items():
            posts_with_memberships = data["posts_with_memberships"]
            by_post_group[post_group]["stats"] = get_post_group_stats(
                posts_with_memberships)
            data["posts_with_memberships"] = sorted(
                posts_with_memberships.items(), key=lambda t: t[0].label)
        context["candidates_by_post_group"] = sorted(
            [(pg, data)
             for pg, data in by_post_group.items() if pg in all_post_groups],
            key=lambda k: k[0],
        )
        return context
Ejemplo n.º 12
0
def _split_by_sampling_rate(
        st: Union[obspy.Stream, obspy.Trace]) -> Dict[int, obspy.Stream]:
    """given a waveforms, split the waveforms into dicts with unique sampling
    rates"""
    if isinstance(st, obspy.Trace):  # convert to waveforms if trace passed
        st = obspy.Stream(traces=[st])
    # iterate and separate sampling_rates
    out = defaultdict(obspy.Stream)
    for tr in st:
        sr = int(np.round(tr.stats.sampling_rate))
        out[sr] += tr
    return dict(out)
Ejemplo n.º 13
0
    def calculate_frequencies(self):
        """Вычисляется частота вхождения каждого подлежащего"""
        frequencies = defaultdict(int)

        for sentence in self._sentences:
            for word in sentence:
                if word.part_sentence == PartSentence.Subject:
                    frequencies[word.lemma] += 1

        for sentence in self._sentences:
            for word in sentence:
                word.frequency = frequencies.get(word.lemma)
    def train(self, train_set):
        num_genre = defaultdict(lambda: 0)

        for t_set in train_set:
            for genre in t_set["genre"]:
                num_genre[genre] += 1 / len(t_set["genre"])

        self.genre_list = num_genre
        print(num_genre)

        word_counts = count_words(train_set)
        self.word_probs = word_probabilities(word_counts, num_genre, self.k)
Ejemplo n.º 15
0
    def __init__(self, *, loop, address: str, port: int):
        self.loop = loop
        self.address = address
        self.port = port
        self.app = web.Application(loop=loop)
        self.app.router.add_routes(
            [web.get('/api/v1/address', self.on_address),
             web.get('/api/v1/payments/{token_address}/{partner_address}', self.on_payment_info)])
        self.handler = None
        self.server = None

        # mapping (token_address, partner_address) -> {nonce1: amount1, nonce2: amount2, ...}:
        self.payments = defaultdict(dict)
Ejemplo n.º 16
0
 def __init__(self, text_path, dict_path, synonymous_dict_path):
     self._text_path = text_path
     self._dict_path = dict_path
     self._synonymous_dict_path = synonymous_dict_path
     '''
     person_counter是一个计数器,用来统计人物出现的次数。{'a':1,'b':2}
     person_per_paragraph每段文字中出现的人物[['a','b'],[]]
     relationships保存的是人物间的关系。key为人物A,value为字典,包含人物B和权值。
     '''
     self._person_counter = defaultdict(int)
     self._person_per_paragraph = []
     self._relationships = {}
     self._synonymous_dict = {}
Ejemplo n.º 17
0
def group_new_test_indices_by_patient(ds):
    tids = ds.field_by_name('id')
    pids = ds.field_by_name('patient_id')
    cats = ds.field_by_name('created_at')
    edates = ds.field_by_name('date_taken_specific')
    edate_los = ds.field_by_name('date_taken_between_start')
    edate_his = ds.field_by_name('date_taken_between_end')
    print('row count:', ds.row_count())

    patients = defaultdict(TestIndices)
    for i_r in range(ds.row_count()):
        patients[pids[i_r]].add(i_r)
    return patients
def scan_folders_for_files(folders):
    """
    scan_folders_for_files loops through specified folders to look for their files
    :param folders: a list of paths to folders
    :return: files: a dictionary with as key a file and as value a list of the paths that lead to the file
    """
    files = defaultdict(list)
    for folder in folders:
        if not folder.is_dir():
            io.warn("Folder %s doesn't exist" % folder)

        for file in list(folder.glob('*.*')):
            files[file.stem].append(file)
    return files
Ejemplo n.º 19
0
    def __init__(self, project_name):

        self.project_name = project_name
        self._was_warned = defaultdict(lambda: False)

        opts = self._get_database(opts=None)
        opts['connect'] = True
        if 'type' in opts:
            opts.pop('type')

        logger.debug(f'establishing connection to the database')
        self.client = MongoClient(**opts)

        logger.debug(f'connection established {self.client}')
        logger.debug(f'connected to the mongo server {self}')
Ejemplo n.º 20
0
    def in_edges(self):
        # workaround for networkx bug when some edges have labels and some have not:
        in_edges = defaultdict(list)
        edge_modifiers = {}
        for f, t, *l in self.ontology.in_edges:

            # look out for is_not labels!
            for relation in l:
                assert 'not' not in relation

            in_edges[t].append(f)
            edge_modifiers[t, f] = l

        self.edge_modifiers = edge_modifiers

        return in_edges
Ejemplo n.º 21
0
def get_patients_with_old_format_tests(a_ds):
    apids = a_ds.field_by_name('patient_id')
    ahcts = a_ds.field_by_name('had_covid_test')
    atcps = a_ds.field_by_name('tested_covid_positive')
    auats = a_ds.field_by_name('updated_at')
    print('row count:', a_ds.row_count())

    apatients = defaultdict(OldFormatTestSummary)
    for i_r in range(a_ds.row_count()):
        if ahcts[i_r] == 'True' or atcps[i_r] in ('no', 'yes'):
            apatients[apids[i_r]].add(i_r, atcps[i_r])

    apatient_test_count = 0
    for k, v in apatients.items():
        if len(v.indices) > 0:
            apatient_test_count += 1

    return apatients
Ejemplo n.º 22
0
def competition(patches):
    # split into runoff return, patch type, slope position

    # subsets = [[], []]
    # # Filter on global flowlength
    # for patch in patches:
    #     if patch.model.runoff_return:
    #         subsets[0].append(patch)
    #     else:
    #         subsets[1].append(patch)

    subsets = [patches]

    new_subsets = []
    for subset in subsets:
        # Sorting patches on FL
        subset.sort(key=lambda x: sum([cell.FL for cell in x.RL + x.BR]))
        # Splitting category in #FL_RESOLUTION equally sized groups of patches
        new_subsets += np.array_split(subset, RESOLUTION)
    subsets = new_subsets

    new_subsets = []
    for subset in subsets:
        # Sorting patches on FL
        subset = list(subset)
        subset.sort(key=lambda x: sum([cell.pos[0] for cell in x.RL + x.BR]))
        # Splitting category in #FL_RESOLUTION equally sized groups of patches
        new_subsets += np.array_split(subset, RESOLUTION)
    subsets = new_subsets

    intra_r, intra_b, inter = [], [], []
    for subset in subsets:
        subset_dict = defaultdict(list)
        for patch in subset:
            # Only sort on the type, not the size!!
            subset_dict[patch.type[0]].append(patch)
        intra_r.append(subset_dict['R'])
        intra_b.append(subset_dict['B'])
        inter.append(subset_dict['M'])

    return intra_r, intra_b, inter
Ejemplo n.º 23
0
    def create_from_tokens(
            cls,
            tokens: Iterable,
            maximum_vocabulary: Optional[int] = None) -> "TokenCodec":
        """
        Create a codec from a sequence of tokens. The vocabulary will consist of all the tokens. Token indexes are
        ordered by frequency then token order.

        :param tokens: sequence of token from which to build the codec
        :param maximum_vocabulary: optionally clip the vocabulary to this many of the most frequent tokens
        :return: a codec
        """
        token_count = defaultdict(int)
        for token in tokens:
            token = Token.create(token)
            token_count[token] += 1
        index_to_token = [
            token for _, token in sorted(
                ((count, token) for token, count in token_count.items()),
                key=lambda t: (-t[0], t[1]))[:maximum_vocabulary]
        ]
        return cls(index_to_token)
Ejemplo n.º 24
0
def filter_duplicate_new_tests(ds,
                               patients,
                               threshold_for_diagnostic_print=1000000):
    tids = ds.field_by_name('id')
    pids = ds.field_by_name('patient_id')
    cats = ds.field_by_name('created_at')
    edates = ds.field_by_name('date_taken_specific')
    edate_los = ds.field_by_name('date_taken_between_start')
    edate_his = ds.field_by_name('date_taken_between_end')

    cleaned_patients = defaultdict(TestIndices)
    for p in patients.items():
        # print(p[0], len(p[1].indices))
        test_dates = set()
        for i_r in reversed(p[1].indices):
            test_dates.add((edates[i_r], edate_los[i_r], edate_his[i_r]))
        if len(test_dates) == 1:
            istart = p[1].indices[-1]
            # utils.print_diagnostic_row(f"{istart}", ds, istart, ds.names_)
            cleaned_patients[p[0]].add(istart)
        else:
            cleaned_entries = dict()
            for t in test_dates:
                cleaned_entries[t] = list()
            for i_r in reversed(p[1].indices):
                cleaned_entries[(edates[i_r], edate_los[i_r],
                                 edate_his[i_r])].append(i_r)

            if len(test_dates) > threshold_for_diagnostic_print:
                print(p[0])
            for e in sorted(cleaned_entries.items(), key=lambda x: x[0]):
                last_index = e[1][0]
                if len(test_dates) > threshold_for_diagnostic_print:
                    utils.print_diagnostic_row(f"{p[0]}/{last_index}", ds,
                                               last_index, ds.names_)
                cleaned_patients[p[0]].add(last_index)

    return cleaned_patients
class StatsHolder:
    log: 'Logger'
    lastUpdate: datetime
    # user and game events:
    # User logs in
    # User logs out
    # User join game
    # User leaves game
    connectTimes: List[float] = [0]
    userEvents = WeakList()
    userEventsView: Dict[str, List[UserEvent]] = defaultdict(list)

    def __init__(self, main: 'WesBot', log) -> None:
        self.main = main
        self.log = log
        self.lastUpdate = datetime.datetime.now()

    def addConnectTime(self) -> None:
        if self.connectTimes[0] < 0.0001:
            self.connectTimes.clear()
        self.connectTimes.append(time.time())

    def getTimeSinceFirstConnect(self) -> str:
        return str(
            datetime.timedelta(seconds=time.time() - self.connectTimes[0]))

    def getTimeSinceLastConnect(self):
        return str(
            datetime.timedelta(seconds=time.time() - self.connectTimes[-1]))

    def onUserRemove(self, name, comment=""):
        if comment is None:
            return
        e = UserEvent(time.time(), "-", name, comment)
        self.userEventsView[name].append(e)
        self.userEvents.append(e)

    def onUserAdd(self, name, comment=""):
        if comment is None:
            return
        e = UserEvent(time.time(), "+", name, comment)
        self.userEventsView[name].append(e)
        self.userEvents.append(e)

    def getUserStats(self, name: str) -> str:
        name = name.strip()
        self.loadUserEvents(name)
        if name not in self.userEventsView:
            return "No stats found for '{}'".format(name)
        d = self.userEventsView[name]
        totalUptime = 0
        lastUptime = 0
        firstJoin = 0
        latestJoin = 0
        lastSeen = 0
        count = 0
        # 0 -> user is offline
        # 1 -> user is online
        # 2 -> user is added, and immediately deleted
        for e in d:
            if e.event == "+":
                count += 1
                if count == 2:
                    continue
                if firstJoin == 0:
                    firstJoin = e.time
                if count == 1:
                    latestJoin = e.time
            if e.event == "-":
                count -= 1
                if e.comment == "onQuit":  # Everyone is considered offline while bot is offline
                    count = 0
                if count == 1:
                    continue
                if count == 0:
                    lastUptime = e.time - latestJoin
                    totalUptime += lastUptime
                    lastSeen = e.time
        if count == 1:
            lastSeen = time.time()
            lastUptime = lastSeen - latestJoin
            totalUptime += lastUptime
        return "{} has been online for {}, in last session {}. Last seen online: {}".format(
            name, str(datetime.timedelta(seconds=totalUptime)),
            str(datetime.timedelta(seconds=lastUptime)),
            datetime.datetime.fromtimestamp(lastSeen).strftime(
                '%d.%m %H:%M:%S'))

    def getLastSeenTime(self, name: str) -> datetime:
        lastSeen = 0
        name = name.strip()
        if name not in self.userEventsView:
            self.log.debug("No stats for user {}".format(name))
            return datetime.datetime.fromtimestamp(lastSeen)
        d = self.userEventsView[name]
        # TODO save user updates with different comment than login and logout
        # Last event time, and for online users current time
        if len(d) > 0:
            lastSeen = d[-1].time
        if name in self.main.lobby.users.getOnlineNames():
            lastSeen = time.time()
        fromtimestamp = datetime.datetime.fromtimestamp(lastSeen)
        self.log.log(
            4, "Found last seen time {} for {}".format(fromtimestamp, name))
        return fromtimestamp

    def onQuit(self):
        self.log.debug("Removing all online users on quit")
        self.logStats()
        u: User
        for u in self.main.lobby.users.getOnlineUsers():
            self.onUserRemove(u.name, "onQuit")
        self.saveUsers()

    def tick(self):
        now: datetime = datetime.datetime.now()
        if now - self.lastUpdate > datetime.timedelta(hours=1):
            self.logStats()
            self.deleteOldData(now)
            self.lastUpdate = now

    def logStats(self):
        self.log.info("Game stats: {}, User stats: {}".format(
            self.main.lobby.games.getStats(),
            self.main.lobby.users.getStats()))

    def deleteOldData(
        self,
        now: datetime,
        deletionTime: datetime.timedelta = datetime.timedelta(hours=2)):
        self.log.debug("userEvents count before archive {}".format(
            len(self.userEvents)))

        self.saveUsers()
        deletableNames = set()
        for name in self.userEventsView:
            last_seen_time: datetime = self.getLastSeenTime(name)
            time_since_seen: datetime.timedelta = now - last_seen_time
            self.log.log(
                5, "User {} time since seen {}, time seen {}".format(
                    name, time_since_seen, last_seen_time))
            if time_since_seen > deletionTime:
                deletableNames.add(name)
        for name in deletableNames:
            del self.userEventsView[name]
            self.log.debug("Deleted {}".format(name))
        self.log.debug("userEvents count after archive {}".format(
            len(self.userEvents)))

    def saveUsers(self):
        for name in self.userEventsView:
            unsavedSince = 0

            filename = "user_events/{}/{}.log".format(name, name)
            os.makedirs(os.path.dirname(filename), exist_ok=True)
            if os.path.isfile(filename):
                with open(filename, "r", encoding="utf8") as f:
                    lines = f.readlines()
                    if len(lines) > 0:
                        e = UserEvent.fromJSON(lines[-1])
                        unsavedSince = e.time

            with open(filename, "a", encoding="utf8") as f:
                for e in self.userEventsView[name]:
                    if e.time > unsavedSince + 0.0001:
                        f.write(e.toJSON() + "\n")
            # TODO new file when it is too large
            # if os.path.getsize(filename) > 1 * 1000 * 1000:
            #     pass
        self.main.log.debug("Users saved")

    def loadUserEvents(self, name):
        filename = "user_events/{}/{}.log".format(name, name)
        if not os.path.isfile(filename):
            return
        self.log.debug("Loading user events for {}".format(name))
        loadedEvents = []
        firstAvailableTime = time.time()
        if len(self.userEventsView[name]) > 0:
            firstAvailableTime = self.userEventsView[name][0].time
        with open(filename, "r", encoding="utf8") as f:
            for line in f:
                e = UserEvent.fromJSON(line)
                if e.time < firstAvailableTime:
                    loadedEvents.append(e)
        self.log.debug("Loaded {} user events for {}".format(
            len(loadedEvents), name))
        if len(loadedEvents) == 0:
            return
        self.userEventsView[name].extend(loadedEvents)
        self.log.debug("First extend")
        self.userEventsView[name].sort(key=lambda e: e.time)
        self.log.debug("First sort")
        self.userEvents.extend(loadedEvents)
        self.log.debug("Second extend")
        self.userEvents.sort(key=lambda e: e.time)
        self.log.debug("Second sort")
Ejemplo n.º 26
0
 def __init__(self):
     self.params = None
     self.model = None
     self.epoch_counter = defaultdict(int)
     self.batch_counter = defaultdict(int)
Ejemplo n.º 27
0
    def __init__(self, average_stats_pqueue: PriorityQueue, time_period):
        self.__avg_stats_queue = average_stats_pqueue
        self.__time_period = time_period

        self.__trx_per_sec = defaultdict(int)
        self.__current_second = None
Ejemplo n.º 28
0
def convert_to_layer_nodes(root):
    """
    At each level in the SPN rooted in the 'root' node, model all the nodes
    as a single layer-node.

    Args:
        root (Node): The root of the SPN graph.

    Returns:
        root (Node): The root of the SPN graph, with each layer modelled as a
                     single layer-node.
    """

    parents = defaultdict(list)
    depths = defaultdict(list)
    node_to_depth = OrderedDict()
    node_to_depth[root] = 1

    def get_parents(node):
        # Add to Parents dict
        if node.is_op:
            for i in node.inputs:
                if (i and  # Input not empty
                        not (i.is_param or i.is_var)):
                    parents[i.node].append(node)
                    node_to_depth[i.node] = node_to_depth[node] + 1

    def permute_inputs(input_values, input_sizes):
        # For a given list of inputs and their corresponding sizes, create a
        # nested-list of (input, index) pairs.
        # E.g: input_values = [(A, [2, 5]), (B, None)]
        #      input_sizes = [2, 3]
        #      inputs = [[('A', 2), ('A', 5)],
        #                [('B', 0), ('B', 1), ('B', 2)]]
        inputs = [
            list(product([inp.node], inp.indices)) if inp and inp.indices else
            list(product([inp.node], list(range(inp_size))))
            for inp, inp_size in zip(input_values, input_sizes)
        ]

        # For a given nested-list of (input, index) pairs, permute over the inputs
        # E.g: permuted_inputs = [('A', 2), ('B', 0),
        #                         ('A', 2), ('B', 1),
        #                         ('A', 2), ('B', 2),
        #                         ('A', 5), ('B', 0),
        #                         ('A', 5), ('B', 1),
        #                         ('A', 5), ('B', 2)]
        permuted_inputs = list(product(*[inps for inps in inputs]))
        return list(chain(*permuted_inputs))

    # Create a parents dictionary of the SPN graph
    traverse_graph(root, fun=get_parents, skip_params=True)

    # Create a depth dictionary of the SPN graph
    for key, value in node_to_depth.items():
        depths[value].append(key)
    spn_depth = len(depths)

    # Iterate through each depth of the SPN, starting from the deepest layer,
    # moving up to the root node
    for depth in range(spn_depth, 1, -1):
        if isinstance(depths[depth][0], (Sum, ParallelSums)):  # A Sums Layer
            # Create a default SumsLayer node
            with tf.name_scope("Layer%s" % depth):
                sums_layer = SumsLayer(name="SumsLayer-%s.%s" % (depth, 1))
            # Initialize a counter for keeping track of number of sums
            # modelled in the layer node
            layer_num_sums = 0
            # Initialize an empty list for storing sum-input-sizes of sums
            # modelled in the layer node
            num_or_size_sums = []
            # Iterate through each node at the current depth of the SPN
            for node in depths[depth]:
                # TODO: To be replaced with node.num_sums once AbstractSums
                # class is introduced
                # No. of sums modelled by the current node
                node_num_sums = (1 if isinstance(node, Sum) else node.num_sums)
                # Add Input values of the current node to the SumsLayer node
                sums_layer.add_values(*node.values * node_num_sums)
                # Add sum-input-size, of each sum modelled in the current node,
                # to the list
                num_or_size_sums += [sum(node.get_input_sizes()[2:])
                                     ] * node_num_sums
                # Visit each parent of the current node
                for parent in parents[node]:
                    try:
                        # 'Values' in case parent is an Op node
                        values = list(parent.values)
                    except AttributeError:
                        # 'Inputs' in case parent is a Concat node
                        values = list(parent.inputs)
                    # Iterate through each input value of the current parent node
                    for i, value in enumerate(values):
                        # If the value is the current node
                        if value.node == node:
                            # Check if it has indices
                            if value.indices is not None:
                                # If so, then just add the num-sums of the
                                # layer-op as offset
                                indices = (np.asarray(value.indices) +
                                           layer_num_sums).tolist()
                            else:
                                # If not, then create a list accrodingly
                                indices = list(
                                    range(layer_num_sums,
                                          (layer_num_sums + node_num_sums)))
                            # Replace previous (node) Input value in the
                            # current parent node, with the new layer-node value
                            values[i] = (sums_layer, indices)
                            break  # Once child-node found, don't have to search further
                    # Reset values of the current parent node, by including
                    # the new child (Layer-node)
                    try:
                        # set 'values' in case parent is an Op node
                        parent.set_values(*values)
                    except AttributeError:
                        # set 'inputs' in case parent is a Concat node
                        parent.set_inputs(*values)
                # Increment num-sums-counter of the layer-node
                layer_num_sums += node_num_sums
                # Disconnect
                node.disconnect_inputs()

            # After all nodes at a certain depth are modelled into a Layer-node,
            # set num-sums parameter accordingly
            sums_layer.set_sum_sizes(num_or_size_sums)
        elif isinstance(depths[depth][0],
                        (Product, PermuteProducts)):  # A Products Layer
            with tf.name_scope("Layer%s" % depth):
                prods_layer = ProductsLayer(name="ProductsLayer-%s.%s" %
                                            (depth, 1))
            # Initialize a counter for keeping track of number of prods
            # modelled in the layer node
            layer_num_prods = 0
            # Initialize an empty list for storing prod-input-sizes of prods
            # modelled in the layer node
            num_or_size_prods = []
            # Iterate through each node at the current depth of the SPN
            for node in depths[depth]:
                # Get input values and sizes of the product node
                input_values = list(node.values)
                input_sizes = list(node.get_input_sizes())
                if isinstance(node, PermuteProducts):
                    # Permute over input-values to model permuted products
                    input_values = permute_inputs(input_values, input_sizes)
                    node_num_prods = node.num_prods
                    prod_input_size = len(input_values) // node_num_prods
                elif isinstance(node, Product):
                    node_num_prods = 1
                    prod_input_size = int(sum(input_sizes))

                # Add Input values of the current node to the ProductsLayer node
                prods_layer.add_values(*input_values)
                # Add prod-input-size, of each product modelled in the current
                # node, to the list
                num_or_size_prods += [prod_input_size] * node_num_prods
                # Visit each parent of the current node
                for parent in parents[node]:
                    values = list(parent.values)
                    # Iterate through each input value of the current parent node
                    for i, value in enumerate(values):
                        # If the value is the current node
                        if value.node == node:
                            # Check if it has indices
                            if value.indices is not None:
                                # If so, then just add the num-prods of the
                                # layer-op as offset
                                indices = value.indices + layer_num_prods
                            else:
                                # If not, then create a list accrodingly
                                indices = list(
                                    range(layer_num_prods,
                                          (layer_num_prods + node_num_prods)))
                            # Replace previous (node) Input value in the
                            # current parent node, with the new layer-node value
                            values[i] = (prods_layer, indices)
                    # Reset values of the current parent node, by including
                    # the new child (Layer-node)
                    parent.set_values(*values)
                # Increment num-prods-counter of the layer node
                layer_num_prods += node_num_prods
                # Disconnect
                node.disconnect_inputs()

            # After all nodes at a certain depth are modelled into a Layer-node,
            # set num-prods parameter accordingly
            prods_layer.set_prod_sizes(num_or_size_prods)

        elif isinstance(depths[depth][0],
                        (SumsLayer, ProductsLayer, Concat)):  # A Concat node
            pass
        else:
            raise StructureError("Unknown node-type: {}".format(
                depths[depth][0]))

    return root
Ejemplo n.º 29
0
def proba_motion_analysis(data_x_gaia,
                          motions=None,
                          x_lim=(0.3, 1),
                          step=0.004,
                          mean_y_lines=None):
    motions = ['parallax'] if motions is None else motions
    mu_dict, sigma_dict, median_dict, error_dict = defaultdict(
        list), defaultdict(list), defaultdict(list), defaultdict(list)

    # Get QSOs
    qso_x_gaia = data_x_gaia.loc[data_x_gaia['CLASS_PHOTO'] == 'QSO']

    # Limit QSOs to proba thresholds
    thresholds = np.arange(x_lim[0], x_lim[1], step)
    for thr in thresholds:
        qso_x_gaia_limited = qso_x_gaia.loc[qso_x_gaia['QSO_PHOTO'] >= thr]

        for motion in motions:
            # Get stats
            (mu, sigma) = stats.norm.fit(qso_x_gaia_limited[motion])
            median = np.median(qso_x_gaia_limited[motion])
            error = sigma / math.sqrt(qso_x_gaia_limited.shape[0])

            # Store values
            mu_dict[motion].append(mu)
            sigma_dict[motion].append(sigma)
            median_dict[motion].append(median)
            error_dict[motion].append(error)

    # Plot statistics
    to_plot = [((mu_dict, error_dict), 'mean'), (sigma_dict, 'sigma'),
               (median_dict, 'median')]
    color_palette = get_cubehelix_palette(len(motions))

    for t in to_plot:
        plt.figure()

        label = None
        for i, motion in enumerate(motions):
            if len(motions) != 1:
                label = motion

            if t[1] == 'mean':
                vals = t[0][0][motion]
                errors = t[0][1][motion]
            else:
                vals = t[0][motion]
                errors = None

            plt.plot(thresholds,
                     vals,
                     label=label,
                     color=color_palette[i],
                     linestyle=get_line_style(i))
            ax = plt.gca()
            if errors:
                lower = np.array(vals) - np.array(errors) / 2
                upper = np.array(vals) + np.array(errors) / 2
                ax.fill_between(thresholds,
                                lower,
                                upper,
                                color=color_palette[i],
                                alpha=0.2)

            if t[1] == 'mean' and mean_y_lines is not None:
                x_lim = ax.get_xlim()
                thr_x_lim = np.arange(x_lim[0], x_lim[1] + 0.01, 0.01)
                for line_name, y, y_err in mean_y_lines:
                    plt.axhline(y, linestyle='--', color='b')
                    ax.fill_between(thr_x_lim,
                                    y - y_err / 2,
                                    y + y_err / 2,
                                    color='b',
                                    alpha=0.2)
                    plt.text(
                        thresholds[0] +
                        0.01 * abs(max(thresholds) - min(thresholds)),
                        y + 0.06 * abs(max(vals) - min(vals)), line_name)
                ax.set_xlim(x_lim)

            plt.xlabel('minimum classification probability')
            plt.ylabel('{} parallax {}'.format(t[1], '[mas]'))

        if label:
            plt.legend(framealpha=1.0)
Ejemplo n.º 30
0
 def __init__(self):
     super().__init__()
     self.operations: DefaultDict[UpdateOperation,
                                  List[str]] = defaultdict(list)
Ejemplo n.º 31
0
    def featurize(self, struct, idx):
        """
        Get crystal fingerprint of site with given index in input
        structure.
        Args:
            struct (Structure): Pymatgen Structure object.
            idx (int): index of target site in structure.
        Returns:
            list of weighted order parameters of target site.
        """

        cn_fingerprint_array = defaultdict(
            list)  # dict where key = CN, val is array that contains each OP for that CN
        total_weight = math.pi / 4  # 1/4 unit circle area

        target = None
        if self.cation_anion:
            target = []
            m_oxi = struct[idx].specie.oxi_state
            for site in struct:
                if site.specie.oxi_state * m_oxi <= 0:  # opposite charge
                    target.append(site.specie)
            if not target:
                raise ValueError(
                    "No valid targets for site within cation_anion constraint!")

        # Use a Voronoi tessellation to identify neighbors of this site
        vnn = VoronoiNN(cutoff=self.cutoff_radius,
                        targets=target)
        n_w = get_nearest_neighbors(vnn, struct, idx)

        # Convert nn info to just a dict of neighbor -> weight
        n_w = dict((x['site'], x['weight']) for x in n_w)

        dist_sorted = (sorted(n_w.values(), reverse=True))

        if self.override_cn1:
            cn1 = 1
            for d in dist_sorted[1:]:
                cn1 = cn1 * (dist_sorted[0] ** 2 - d ** 2) / dist_sorted[0] ** 2
            cn_fingerprint_array[1] = [round(cn1, 6)]
            dist_sorted[0] = dist_sorted[1]

        dist_norm = [d / dist_sorted[0] for d in dist_sorted if d > 0]

        dist_bins = []  # bin numerical tolerances (~error bar of measurement)
        for d in dist_norm:
            if not dist_bins or (
                    d > self.tol and dist_bins[-1] / (1 + self.tol) > d):
                dist_bins.append(d)

        for dist_idx, dist in enumerate(dist_bins):
            neigh_sites = [n for n, w in n_w.items() if
                           w > 0 and w / dist_sorted[0] >= dist / (
                                   1 + self.tol)]
            cn = len(neigh_sites)
            if cn in self.ops:
                for opidx, op in enumerate(self.ops[cn]):
                    if self.optypes[cn][opidx] == "wt":
                        opval = 1
                    else:
                        opval = \
                        op.get_order_parameters([struct[idx]] + neigh_sites, 0,
                                                indices_neighs=[i for i in
                                                                range(1, len(
                                                                    neigh_sites) + 1)])[
                            0]

                    opval = opval or 0  # handles None

                    # figure out the weight for this opval based on semicircle integration method
                    x1 = 1 - dist
                    x2 = 1 if dist_idx == len(dist_bins) - 1 else \
                        1 - dist_bins[dist_idx + 1]
                    weight = self._semicircle_integral(x2) - \
                             self._semicircle_integral(x1)

                    opval = opval * weight / total_weight

                    cn_fingerprint_array[cn].append(opval)

        # convert dict to list
        cn_fingerprint = []
        for cn in sorted(self.optypes):
            for op_idx, _ in enumerate(self.optypes[cn]):
                try:
                    cn_fingerprint.append(cn_fingerprint_array[cn][op_idx])
                except IndexError:  # no OP value computed
                    cn_fingerprint.append(0)

        return cn_fingerprint
Ejemplo n.º 32
0
  def __init__(self, defaults: Sequence[Tuple[Union[str, Tuple], object]], seconds=60,
               callables: List[DeltaExecutable] = (('quantity', lambda x: len(x)), ('volume_total', lambda x: sum(x))),
               starting_moment: datetime.datetime = None, **kwargs):

    super().__init__(f'delta-{seconds}', defaults, callables, seconds, starting_moment, **kwargs)
    self._time_storage = defaultdict(deque)