コード例 #1
1
ファイル: molpro2.py プロジェクト: jromerofontalvo/psi4public
    def muster_basis_options(self):
        text = ""
        lowername = self.method.lower()
        options = defaultdict(lambda: defaultdict(dict))

        options["BASIS"]["ORBITAL"]["value"] = self.basis

        if self.method in ["ccsd(t)-f12-optri"]:
            if self.basis == "cc-pvdz-f12":
                options["BASIS"]["JKFIT"]["value"] = "aug-cc-pvtz/jkfit"
                options["BASIS"]["JKFITC"]["value"] = self.basis + "/optri"
                options["BASIS"]["MP2FIT"]["value"] = "aug-cc-pvtz/mp2fit"
        elif (
            ("df-" in self.method)
            or ("f12" in self.method)
            or (self.method in ["mp2c", "dft-sapt", "dft-sapt-pbe0acalda"])
        ):
            if self.unaugbasis and self.auxbasis:
                options["BASIS"]["JKFIT"]["value"] = self.auxbasis + "/jkfit"
                options["BASIS"]["JKFITB"]["value"] = self.unaugbasis + "/jkfit"
                options["BASIS"]["MP2FIT"]["value"] = self.auxbasis + "/mp2fit"
                options["BASIS"]["DFLHF"]["value"] = self.auxbasis + "/jkfit"
            else:
                raise ValidationError("""Auxiliary basis not predictable from orbital basis '%s'""" % (self.basis))
        return text, options
コード例 #2
0
ファイル: scheduler.py プロジェクト: TC01/cocotb
    def __init__(self):

        self.log = SimLog("cocotb.scheduler")
        if _debug:
            self.log.setLevel(logging.DEBUG)

        # A dictionary of pending coroutines for each trigger,
        # indexed by trigger
        self._trigger2coros = collections.defaultdict(list)

        # A dictionary of pending triggers for each coroutine, indexed by coro
        self._coro2triggers = collections.defaultdict(list)

        # Our main state
        self._mode = Scheduler._MODE_NORMAL

        # A dictionary of pending writes
        self._writes = {}

        self._pending_coros = []
        self._pending_callbacks = []
        self._pending_triggers = []
        self._pending_threads = []
        self._pending_events = []   # Events we need to call set on once we've unwound

        self._terminate = False
        self._test_result = None
        self._entrypoint = None
        self._main_thread = threading.current_thread()

        # Select the appropriate scheduling algorithm for this simulator
        self.advance = self.default_scheduling_algorithm
        self._is_reacting = False
コード例 #3
0
ファイル: views.py プロジェクト: dupuy/scrumbugz
 def get_context_data(self, **kwargs):
     context = super(BugmailStatsView, self).get_context_data(**kwargs)
     json_stats = cache.get(self.cache_key)
     if not json_stats:
         wks_ago = (now() - timedelta(days=14)).date()
         stats = BugmailStat.objects.stats_for_range(wks_ago)
         stats_dict = {
             BugmailStat.TOTAL: defaultdict(int),
             BugmailStat.USED: defaultdict(int),
         }
         for s in stats:
             stats_dict[s.stat_type][date_to_js(s.date)] += s.count
         all_stats = {
             'total': [],
             'used': [],
             'x_axis': [],
         }
         stats_total = stats_dict[BugmailStat.TOTAL]
         stats_used = stats_dict[BugmailStat.USED]
         for d in date_range(wks_ago):
             d = date_to_js(d)
             all_stats['x_axis'].append(d)
             all_stats['total'].append([d, stats_total[d]])
             all_stats['used'].append([d, stats_used[d]])
         json_stats = json.dumps(all_stats)
         cache.set(self.cache_key, json_stats, 1800)  # 30 minutes
     context['stats'] = json_stats
     return context
コード例 #4
0
def bench_b(power_list):

    n_samples, n_features = 1000, 10000
    data_params = {'n_samples': n_samples, 'n_features': n_features,
                   'tail_strength': .7, 'random_state': random_state}
    dataset_name = "low rank matrix %d x %d" % (n_samples, n_features)
    ranks = [10, 50, 100]

    if enable_spectral_norm:
        all_spectral = defaultdict(list)
    all_frobenius = defaultdict(list)
    for rank in ranks:
        X = make_low_rank_matrix(effective_rank=rank, **data_params)
        if enable_spectral_norm:
            X_spectral_norm = norm_diff(X, norm=2, msg=False)
        X_fro_norm = norm_diff(X, norm='fro', msg=False)

        for n_comp in [np.int(rank/2), rank, rank*2]:
            label = "rank=%d, n_comp=%d" % (rank, n_comp)
            print(label)
            for pi in power_list:
                U, s, V, _ = svd_timing(X, n_comp, n_iter=pi, n_oversamples=2,
                                        power_iteration_normalizer='LU')
                if enable_spectral_norm:
                    A = U.dot(np.diag(s).dot(V))
                    all_spectral[label].append(norm_diff(X - A, norm=2) /
                                               X_spectral_norm)
                f = scalable_frobenius_norm_discrepancy(X, U, s, V)
                all_frobenius[label].append(f / X_fro_norm)

    if enable_spectral_norm:
        title = "%s: spectral norm diff vs n power iteration" % (dataset_name)
        plot_power_iter_vs_s(power_iter, all_spectral, title)
    title = "%s: frobenius norm diff vs n power iteration" % (dataset_name)
    plot_power_iter_vs_s(power_iter, all_frobenius, title)
コード例 #5
0
ファイル: scheduler.py プロジェクト: TC01/cocotb
    def default_scheduling_algorithm(self):
        """
        Decide whether we need to schedule our own triggers (if at all) in
        order to progress to the next mode.

        This algorithm has been tested against the following simulators:
            Icarus Verilog
        """
        if not self._terminate and self._writes:

            if self._mode == Scheduler._MODE_NORMAL:
                if not self._readwrite.primed:
                    self._readwrite.prime(self.react)
            elif not self._next_timestep.primed:
                self._next_timestep.prime(self.react)

        elif self._terminate:
            if _debug:
                self.log.debug("Test terminating, scheduling Timer")

            for t in self._trigger2coros:
                t.unprime()

            for t in [self._readwrite, self._readonly, self._next_timestep,
                      self._timer1, self._timer0]:
                if t.primed:
                    t.unprime()

            self._timer1.prime(self.begin_test)
            self._trigger2coros = collections.defaultdict(list)
            self._coro2triggers = collections.defaultdict(list)
            self._terminate = False
            self._mode = Scheduler._MODE_TERM
コード例 #6
0
def getTrainingContextData():
    
    training_data = OrderedDict()
    
    #Initialising the xml parser for the training and test set
    training_root = initializeXMLParser(dir_path+training_file) 
    
    #Grabbing one word type at a time
    for word_type_xml in training_root:
        word_type = word_type_xml.attrib['item']
        training_data[word_type] = defaultdict(lambda: defaultdict(dict))
        
        #Grabbing the instance id and its list of senses
        for word_instance in word_type_xml:
            instance = word_instance.attrib['id']
            senses   = [answer.attrib['senseid'] for answer in word_instance.findall('answer')]
            pre_context  = word_instance.find('context').text.split()
            post_context = word_instance.find('context').find('head').tail.split()
            
            #Pre-processing the pre-context and post context
            #TODO: Check why this is reducing the accuracy of the model by 1%
            pre_context = preProcessContextData(pre_context)
            post_context = preProcessContextData(post_context)
            
            training_data[word_type]['training'][instance] = {"Sense":senses, "Pre-Context":pre_context, "Post-Context":post_context }
        
        #break;#TODO: Remove this breakpoint. Only testing for one word type right now
    return training_data
コード例 #7
0
ファイル: add.py プロジェクト: ENuge/sympy
    def as_coefficients_dict(a):
        """Return a dictionary mapping terms to their Rational coefficient.
        Since the dictionary is a defaultdict, inquiries about terms which
        were not present will return a coefficient of 0. If an expression is
        not an Add it is considered to have a single term.

        Examples
        ========

        >>> from sympy.abc import a, x
        >>> (3*x + a*x + 4).as_coefficients_dict()
        {1: 4, x: 3, a*x: 1}
        >>> _[a]
        0
        >>> (3*a*x).as_coefficients_dict()
        {a*x: 3}
        """

        d = defaultdict(list)
        for ai in a.args:
            c, m = ai.as_coeff_Mul()
            d[m].append(c)
        for k, v in d.iteritems():
            if len(v) == 1:
                d[k] = v[0]
            else:
                d[k] = Add(*v)
        di = defaultdict(int)
        di.update(d)
        return di
コード例 #8
0
ファイル: ngram_kneser_ney.py プロジェクト: dounan/tweeter
  def __init__(self, max_n):
    """
    max_n must be greater than or equal to 2.
    """
    self._max_n = max_n

    # Maps {n: {ngram_prefix: word_counts}}
    #    ngram_prefix is a tuple of words.
    #    word_counts is a Counter of word to count.
    self._ngram_word_counts_map = {}

    # Used to calculate the continuation counts.
    # For each n, maps a word to a set of ngram_prefix that preceed it.
    # Maps {n: {word: set(ngram_prefix)}}
    self._continuations_map = {}

    # Used to normalize continuation counts into a probability.
    # Maps {n: set(ngram)}
    self._ngrams_map = {}

    # Maps {order: discount}
    # TODO(dounanshi): calculate discount http://www.riacs.edu/research/technical_reports/TR_pdf/TR_00.07.pdf
    self._discount_map = {1: .75, 2: .75, 3: .75}

    # Initialize maps.
    for i in range(max_n):
      n = i + 1
      self._ngram_word_counts_map[n] = defaultdict(Counter)
      self._continuations_map[n] = defaultdict(set)
      self._ngrams_map[n] = set()

    # Maps {ngram_prefix: count}
    self._prefix_count_cache = {}
    # Maps {ngram_prefix: (n1, n2, n3)}
    self._nvals_cache = {}
コード例 #9
0
ファイル: main.py プロジェクト: barneycarroll/Skyperious
def run_merge(filenames):
    """Merges all Skype databases to a new database."""
    dbs = [skypedata.SkypeDatabase(f) for f in filenames]
    db_base = dbs.pop()
    counts = collections.defaultdict(lambda: collections.defaultdict(int))
    postbacks = Queue.Queue()
    postfunc = lambda r: postbacks.put(r)
    worker = workers.MergeThread(postfunc)

    name, ext = os.path.splitext(os.path.split(db_base.filename)[-1])
    now = datetime.datetime.now().strftime("%Y%m%d")
    filename_final = util.unique_path("%s.merged.%s%s" %  (name, now, ext))
    print("Creating %s, using %s as base." % (filename_final, db_base))
    shutil.copyfile(db_base.filename, filename_final)
    db2 = skypedata.SkypeDatabase(filename_final)
    chats2 = db2.get_conversations()
    db2.get_conversations_stats(chats2)

    for db1 in dbs:
        chats = db1.get_conversations()
        db1.get_conversations_stats(chats)
        bar_total = sum(c["message_count"] for c in chats)
        bar_text = " Processing %.*s.." % (30, db1)
        bar = ProgressBar(max=bar_total, afterword=bar_text)
        bar.start()
        args = {"db1": db1, "db2": db2, "chats": chats,
                "type": "diff_merge_left"}
        worker.work(args)
        while True:
            result = postbacks.get()
            if "error" in result:
                print("Error merging %s:\n\n%s" % (db1, result["error"]))
                worker = None # Signal for global break
                break # break while True
            if "done" in result:
                break # break while True
            if "diff" in result:
                counts[db1]["chats"] += 1
                counts[db1]["msgs"] += len(result["diff"]["messages"])
                msgcounts = sum(c["message_count"] for c in result["chats"])
                bar.update(bar.value + msgcounts)
            if result["output"]:
                log(result["output"])
        if not worker:
            break # break for db1 in dbs
        bar.stop()
        bar.afterword = " Processed %s." % db1
        bar.update(bar_total)
        print

    if not counts:
        print("Nothing new to merge.")
        db2.close()
        os.unlink(filename_final)
    else:
        for db1 in dbs:
            print("Merged %s in %s from %s." %
                  (util.plural("message", counts[db1]["msgs"]),
                   util.plural("chat", counts[db1]["chats"]), db1))
        print("Merge into %s complete." % db2)
コード例 #10
0
ファイル: replay.py プロジェクト: BBLN/sc2reader
def APMTracker(replay):
    """
    Builds ``player.aps`` and ``player.apm`` dictionaries where an action is
    any Selection, Hotkey, or Ability event.

    Also provides ``player.avg_apm`` which is defined as the sum of all the
    above actions divided by the number of seconds played by the player (not
    necessarily the whole game) multiplied by 60.
    """
    for player in replay.players:
        player.aps = defaultdict(int)
        player.apm = defaultdict(int)
        player.seconds_played = replay.length.seconds

        for event in player.events:
            if event.name == 'SelectionEvent' or 'AbilityEvent' in event.name or 'ControlGroup' in event.name:
                player.aps[event.second] += 1
                player.apm[int(event.second/60)] += 1

            elif event.name == 'PlayerLeaveEvent':
                player.seconds_played = event.second

        if len(player.apm) > 0:
            player.avg_apm = sum(player.aps.values())/float(player.seconds_played)*60
        else:
            player.avg_apm = 0

    return replay
コード例 #11
0
ファイル: basepack.py プロジェクト: davidshepherd7/dotfiles
    def _getavailablepackfiles(self):
        """For each pack file (a index/data file combo), yields:
          (full path without extension, mtime, size)

        mtime will be the mtime of the index/data file (whichever is newer)
        size is the combined size of index/data file
        """
        indexsuffixlen = len(self.INDEXSUFFIX)
        packsuffixlen = len(self.PACKSUFFIX)

        ids = set()
        sizes = defaultdict(lambda: 0)
        mtimes = defaultdict(lambda: [])
        try:
            for filename, type, stat in osutil.listdir(self.path, stat=True):
                id = None
                if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
                    id = filename[:-indexsuffixlen]
                elif filename[-packsuffixlen:] == self.PACKSUFFIX:
                    id = filename[:-packsuffixlen]

                # Since we expect to have two files corresponding to each ID
                # (the index file and the pack file), we can yield once we see
                # it twice.
                if id:
                    sizes[id] += stat.st_size # Sum both files' sizes together
                    mtimes[id].append(stat.st_mtime)
                    if id in ids:
                        yield (os.path.join(self.path, id), max(mtimes[id]),
                            sizes[id])
                    else:
                        ids.add(id)
        except OSError as ex:
            if ex.errno != errno.ENOENT:
                raise
コード例 #12
0
ファイル: scheduler.py プロジェクト: Wajih-O/luigi
 def worker_list(self, include_running=True, **kwargs):
     self.prune()
     workers = [
         dict(
             name=worker.id,
             last_active=worker.last_active,
             started=getattr(worker, 'started', None),
             **worker.info
         ) for worker in self._state.get_active_workers()]
     workers.sort(key=lambda worker: worker['started'], reverse=True)
     if include_running:
         running = collections.defaultdict(dict)
         num_pending = collections.defaultdict(int)
         num_uniques = collections.defaultdict(int)
         for task in self._state.get_pending_tasks():
             if task.status == RUNNING and task.worker_running:
                 running[task.worker_running][task.id] = self._serialize_task(task.id, False)
             elif task.status == PENDING:
                 for worker in task.workers:
                     num_pending[worker] += 1
                 if len(task.workers) == 1:
                     num_uniques[list(task.workers)[0]] += 1
         for worker in workers:
             tasks = running[worker['name']]
             worker['num_running'] = len(tasks)
             worker['num_pending'] = num_pending[worker['name']]
             worker['num_uniques'] = num_uniques[worker['name']]
             worker['running'] = tasks
     return workers
コード例 #13
0
ファイル: dailrclassifier.py プロジェクト: UFAL-DSG/alex
    def extract_classifiers(self, das, utterances, verbose=False):
        # process the training data
        self.utterances = utterances
        self.das = das

        self.utterances_list = self.utterances.keys()

        self.utterance_fvc = {}
        self.das_abstracted = {}
        self.das_category_labels = {}
        for utt_idx in self.utterances_list:
            self.utterances[utt_idx] = self.preprocessing.normalise(self.utterances[utt_idx])
            self.utterance_fvc[utt_idx] = self.get_fvc(self.utterances[utt_idx])
            self.das_abstracted[utt_idx], self.das_category_labels[utt_idx] = \
                self.get_abstract_da(self.das[utt_idx],self.utterance_fvc[utt_idx])

        # get the classifiers
        self.classifiers = defaultdict(int)
        self.classifiers = defaultdict(int)

        for k in self.utterances_list:
            for dai in self.das_abstracted[k].dais:
                self.classifiers[unicode(dai)] += 1

                if verbose:
                    if dai.value and 'CL_' not in dai.value:
                        print '=' * 120
                        print 'Un-abstracted category label value'
                        print '-' * 120
                        print unicode(self.utterances[k])
                        print unicode(self.utterance_fvc[k])
                        print unicode(self.das[k])
                        print unicode(self.das_abstracted[k])
コード例 #14
0
ファイル: dailrclassifier.py プロジェクト: UFAL-DSG/alex
    def prune_features(self, clser, min_pos_feature_count, min_neg_feature_count, verbose=False):
        if verbose:
            print 'Pruning the features'
            print

        features_counts = defaultdict(int)
        for feat in self.classifiers_features[clser]:
            for f in feat:
                features_counts[f] += 1

        if verbose:
            print "  Number of features: ", len(features_counts)


        features_counts = defaultdict(lambda: [0, 0])
        for feat, output in zip(self.classifiers_features[clser], self.classifiers_outputs[clser]):
            output = 0 if output < 0.5 else 1

            for f in feat:
                features_counts[f][output] += 1

        remove_features = []
        for f in features_counts:
            negative, positive = features_counts[f]

            if positive >= min_pos_feature_count + len(f):
                # keep it
                continue


            if negative >= min_neg_feature_count + len(f):
                # keep it
                continue

            # remove the feature since it does not meet the criteria
            remove_features.append(f)

        if verbose:
            print "  Number of features occurring less then %d positive times and %d negative times: %d" % \
                  (min_pos_feature_count, min_neg_feature_count, len(remove_features))

        remove_features = set(remove_features)
        for feat in self.classifiers_features[clser]:
            feat.prune(remove_features)


        # count the features again and report the result
        features_counts = defaultdict(int)
        for feat in self.classifiers_features[clser]:
            for f in feat:
                features_counts[f] += 1

        self.classifiers_features_list[clser] = features_counts.keys()

        self.classifiers_features_mapping[clser] = {}
        for i, f in enumerate(self.classifiers_features_list[clser]):
            self.classifiers_features_mapping[clser][f] = i

        if verbose:
            print "  Number of features after pruning: ", len(features_counts)
コード例 #15
0
ファイル: tools.py プロジェクト: richtier/imhotep
    def invoke(self, dirname, filenames=set(), linter_configs=set()):
        """
        Main entrypoint for all plugins.

        Returns results in the format of:

        {'filename': {
          'line_number': [
            'error1',
            'error2'
            ]
          }
        }

        """
        retval = defaultdict(lambda: defaultdict(list))
        extensions = ' -o '.join(['-name "*%s"' % ext for ext in
                                  self.get_file_extensions()])

        cmd = 'find %s %s | xargs %s' % (
            dirname, extensions, self.get_command(
                dirname,
                linter_configs=linter_configs))
        result = self.executor(cmd)
        for line in result.split('\n'):
            output = self.process_line(dirname, line)
            if output is not None:
                filename, lineno, messages = output
                if filename.startswith(dirname):
                    filename = filename[len(dirname) + 1:]
                retval[filename][lineno].append(messages)
        return retval
コード例 #16
0
def _Symbolize(input):
  asan_libs = _FindASanLibraries()
  libraries = collections.defaultdict(list)
  asan_lines = []
  for asan_log_line in [a.strip() for a in input]:
    m = _ParseAsanLogLine(asan_log_line)
    if m:
      libraries[m['library']].append(m)
    asan_lines.append({'raw_log': asan_log_line, 'parsed': m})

  all_symbols = collections.defaultdict(dict)
  original_symbols_dir = symbol.SYMBOLS_DIR
  for library, items in libraries.iteritems():
    libname = _TranslateLibPath(library, asan_libs)
    lib_relative_addrs = set([i['rel_address'] for i in items])
    info_dict = symbol.SymbolInformationForSet(libname,
                                               lib_relative_addrs,
                                               True)
    if info_dict:
      all_symbols[library]['symbols'] = info_dict

  for asan_log_line in asan_lines:
    m = asan_log_line['parsed']
    if not m:
      print asan_log_line['raw_log']
      continue
    if (m['library'] in all_symbols and
        m['rel_address'] in all_symbols[m['library']]['symbols']):
      s = all_symbols[m['library']]['symbols'][m['rel_address']][0]
      print s[0], s[1], s[2]
    else:
      print asan_log_line['raw_log']
コード例 #17
0
ファイル: isostate.py プロジェクト: ianmackinnon/isostate
    def reload(self, data):
        self.data = data
        self.data = [row for row in self.data if row["lang"] == self.lang]

        self.by_name = {}
        for row in self.data:
            self.by_name[row["name"]] = {
                "iso2":row["iso2"],
                "sub":row["sub"],
                }

        def ngram_dict():
            return {
                "name": defaultdict(float),
                "total": 0.0
                }

        names = defaultdict(list)
        for row in self.data:
            iso2 = row["iso2"]
            name = row["name"]
            names[iso2].append(name)

        self.ngrams = defaultdict(ngram_dict)
        for iso2, name_list in list(names.items()):
            weight = 1.0 / len(name_list)
            for name in name_list:
                for ngram in text_to_ngrams(name, self.size):
                    self.ngrams[ngram]["name"][name] += weight
                    self.ngrams[ngram]["total"] += weight
コード例 #18
0
ファイル: isostate.py プロジェクト: ianmackinnon/isostate
    def search_all(self, text):
        candidates = defaultdict(float)

        for ngram in text_to_ngrams(text, self.size):
            matches = self.ngrams.get(ngram, None)
            if not matches:
                continue
            total = matches["total"]
            for name, value in list(matches["name"].items()):
                candidates[name] += float(value) / total

        def score():
            return {
                "value": 0.0,
                }

        high_scores = defaultdict(score)

        for name, value in list(candidates.items()):
            row = self.by_name.get(name, None)
            key = row["iso2"]
            if row["sub"]:
                key += ">"
            if value > high_scores[key]["value"]:
                high_scores[key] = {
                    "iso2": row["iso2"],
                    "value": value,
                    "name": name,
                    "sub": row["sub"],
                }

        high_scores = sorted(high_scores.values(),
                             key=lambda x: x["value"], reverse=True)

        return high_scores
コード例 #19
0
    def findSubstring(self, s, words):
        """
        :type s: str
        :type words: List[str]
        :rtype: List[int]
        """
        result, m, n, k = [], len(s), len(words), len(words[0])
        if m < n*k:
            return result

        lookup = collections.defaultdict(int)
        for i in words:
            lookup[i] += 1                            # Space: O(n * k)

        for i in xrange(m+1-k*n):                     # Time: O(m)
            cur, j = collections.defaultdict(int), 0
            while j < n:                              # Time: O(n)
                word = s[i+j*k:i+j*k+k]               # Time: O(k)
                if word not in lookup:
                    break
                cur[word] += 1
                if cur[word] > lookup[word]:
                    break
                j += 1
            if j == n:
                result.append(i)

        return result
コード例 #20
0
    def generate_te_doping(self, d):
        types = ['p', 'n']
        target = 'seebeck_doping'  # root key for getting all temps, etc

        pf_dict = defaultdict(lambda: defaultdict(int))
        zt_dict = defaultdict(lambda: defaultdict(int))

        for type in types:
            for t in d[target][type]:  # temperatures
                outside_pf_array = []
                outside_zt_array = []
                for didx, tensor in enumerate(d[target][type][t]):  # doping idx
                    inside_pf_array = []
                    inside_zt_array = []
                    for tidx, val in enumerate(tensor):
                            seebeck = d['seebeck_doping'][type][t][didx][tidx]
                            cond = d['cond_doping'][type][t][didx][tidx]
                            kappa = d['kappa_doping'][type][t][didx][tidx]
                            inside_pf_array.append(seebeck*seebeck*cond)
                            inside_zt_array.append(seebeck*seebeck*cond*t/kappa)
                    outside_pf_array.append(inside_pf_array)
                    outside_zt_array.append(inside_zt_array)

                pf_dict[type][t] = outside_pf_array
                zt_dict[type][t] = outside_zt_array

        return pf_dict, zt_dict
コード例 #21
0
ファイル: ajax.py プロジェクト: QuicNYC/rscript
def add2Tree(tree,id,name,floor,type):
    type = 'Type-'+type
    for i, btype in enumerate(tree['item']):
        if btype['id'] == type:
            ntype = btype
            break
    else:
        ntype = defaultdict()
        ntype['text'] = type
        ntype['id'] = type
        ntype['item'] = []
        tree['item'].append(ntype)
    floor = 'Floor-' + floor
    floorid = type + floor
    for i, bfloor in enumerate(ntype['item']):
        if bfloor['id'] == floorid:
            nfloor = bfloor
            break
    else:
        nfloor = defaultdict()
        nfloor['text'] = floor
        nfloor['id'] = floorid
        nfloor['item'] = []
        ntype['item'].append(nfloor)

    leaf = defaultdict()
    leaf['id']= str(id) + '-' + name
    leaf['text']= '.'.join(name.split('.')[3:])
    nfloor['item'].append(leaf)
コード例 #22
0
ファイル: diag.py プロジェクト: pothiers/tada
def metadata_catalog(fits_filenames):
    "Histogram the metadata values in list of fits files."
    
    common, optional = metadata_field_use(fits_filenames)
    allfields = optional.union(common)
    histo = collections.defaultdict(int)
    values = collections.defaultdict(set)
    for fname in fits_filenames:
        hdulist = pyfits.open(fname)
        hdr = hdulist[0].header
        for field in allfields:
            if field in hdr:
                histo[field] += 1
                values[field].add(str(hdr[field]))
        hdulist.close()

    print('\n', '~'*78)
    print('Histogram of field use:')
    pprint(histo)

    print('\n', '~'*78)
    
    max_unique = 0.80
    print('Values used (max %s unique values):'%(max_unique))
    #! pprint(values)
    for k,v in values.items():
        if float(len(v))/len(fits_filenames) > max_unique: continue
        print('%8s: %s'%(k,', '.join(v)))
コード例 #23
0
ファイル: pipeline.py プロジェクト: dav009/ScalaFunctional
    def to_dict(self, default=None):
        """
        Converts sequence of (Key, Value) pairs to a dictionary.

        >>> type(seq([('a', 1)]).to_dict())
        dict

        >>> seq([('a', 1), ('b', 2)]).to_dict()
        {'a': 1, 'b': 2}

        :param default: Can be a callable zero argument function. When not None, the returned
            dictionary is a collections.defaultdict with default as value for missing keys. If the
            value is not callable, then a zero argument lambda function is created returning the
            value and used for collections.defaultdict
        :return: dictionary from sequence of (Key, Value) elements
        """
        dictionary = {}
        for e in self.sequence:
            dictionary[e[0]] = e[1]
        if default is None:
            return dictionary
        else:
            if hasattr(default, '__call__'):
                return collections.defaultdict(default, dictionary)
            else:
                return collections.defaultdict(lambda: default, dictionary)
コード例 #24
0
ファイル: graph_net.py プロジェクト: HimmelStein/lg-flask
    def __init__(self, ldg=None):
        DependencyGraph.__init__(self)
        self.nodes = defaultdict(lambda: {'address': None,
                                          'ldg': 0,
                                          'gid': 1, #has the same value of the gid of nodes in ldg.
                                          'lemma': None,
                                          'head': None,
                                          'deps': defaultdict(int),
                                          'remaining_ops': defaultdict(list), #list(LgGraph.operator_dic.keys()),
                                          'ctag': None,
                                          'tag': None,
                                          'feats': None,
                                          })
        self.git_list = [1]
        self.nodes[0].update(
                        {'address': 0,
                         'head': -1,
                         'ldg': 'TOP',
                         'gid': 1, #has the same value of the gid of nodes in ldg.
                         'remaining_ops': defaultdict(list),
                         }
                    )
        if isinstance(ldg, LgGraph):
            self.nodes[0]['ldg'] = ldg

        if isinstance(ldg, GraphNet):
            self.nodes = ldg
            self.git_list = ldg.get_git_list()
コード例 #25
0
ファイル: traversal.py プロジェクト: vascotenner/holoviews
def hierarchical(keys):
    """
    Iterates over dimension values in keys, taking two sets
    of dimension values at a time to determine whether two
    consecutive dimensions have a one-to-many relationship.
    If they do a mapping between the first and second dimension
    values is returned. Returns a list of n-1 mappings, between
    consecutive dimensions.
    """
    ndims = len(keys[0])
    if ndims <= 1:
        return True
    dim_vals = list(zip(*keys))
    combinations = (zip(*dim_vals[i : i + 2]) for i in range(ndims - 1))
    hierarchies = []
    for combination in combinations:
        hierarchy = True
        store1 = defaultdict(list)
        store2 = defaultdict(list)
        for v1, v2 in combination:
            if v2 not in store2[v1]:
                store2[v1].append(v2)
            previous = store1[v2]
            if previous and previous[0] != v1:
                hierarchy = False
                break
            if v1 not in store1[v2]:
                store1[v2].append(v1)
        hierarchies.append(store2 if hierarchy else {})
    return hierarchies
コード例 #26
0
ファイル: bot.py プロジェクト: rohan/groupme-markov
  def __init__(self, names, messages):
    self.names = names
    self.messages = messages
    # do some preanalysis

    # MBU: user_id -> [message]
    self.messages_by_user = defaultdict(list)

    # who has liked {{user}}'s messages?
    # user_id -> (liker -> count)
    self.likes_per_user = defaultdict(lambda : defaultdict(int))

    # who has {{user}} liked?
    # user_id -> (liked -> count)
    self.user_likes = defaultdict(lambda : defaultdict(int))

    # which words are used most often?
    # word -> (user_id -> count)
    self.most_common_words = defaultdict(lambda : defaultdict(int))

    # per user, which words are used most often?
    # user_id -> (word -> count)
    self.mcw_per_user = defaultdict(lambda : defaultdict(int))

    # which users have liked their own posts?
    # user_id -> count
    self.self_likes = defaultdict(int)

    for message in messages:
      self.read_message(message)
コード例 #27
0
ファイル: bot.py プロジェクト: rohan/groupme-markov
  def __init__(self, k, messages):
    self.k = k
    # user_id -> (phrase -> [next words])
    self.m = defaultdict(lambda : defaultdict(list))

    for message in messages:
      self.read_message(message)
コード例 #28
0
  def __init__(self, analysis_files):
    # The analysis files we gather information from.
    self.analysis_files = analysis_files

    # Map from scala source files to the class files generated from that source
    self.products = defaultdict(set)

    # Map from scala sources to jar files they depend on. (And, rarely, class files.)
    self.binary_deps = defaultdict(set)

    # Map from scala sources to the source files providing the classes that they depend on
    # The set of source files here does *not* appear to include inheritance!
    # eg, in src/jvm/com/foursquare/api/util/BUILD:util,
    # in the source file ClientMetrics, class ClientView extends PrettyEnumeration, but
    # the file declaring PrettyEnumeration is *not* in the source deps.
    # But PrettyEnumeration *is* included in the list of classes in external_deps.
    self.source_deps = defaultdict(set)

    # Map from scala sources to the classes that they depend on. (Not class files, source files, but just classes.
    self.external_deps = defaultdict(set)

    # Map from scala sources to the classes that they provide. (Again, not class files, fully-qualified class names.)
    self.class_names = defaultdict(set)

    for c in self.analysis_files:
      self.parse(c)
コード例 #29
0
ファイル: views.py プロジェクト: akarim78/HighFly
def show_connections():
	user=User.query.filter_by(id=session['user_id']).first()
	if 'linkedin_token' in session:
		conns = linkedin.get('people/~/connections:(headline,id,first-name,last-name,location,industry,picture-url)')

	f = open('data.json', 'w')
	f.write(json.dumps(conns.data, indent=1))
	f.close()
	connections = json.loads(json.dumps(conns.data, indent=1))
    # Get an id for a connection. We'll just pick the first one.
	print len(connections['values'])
	index = 0
	all = list()
	categorized = defaultdict(list)
	countdata = defaultdict()
    
	for conn in connections['values']:
		try:
            #all.append()
			name = conn['firstName'].encode("utf-8")+' '+conn['lastName'].encode("utf-8")
			industry = conn['industry'].encode("utf-8")
			headline = conn['headline'].encode("utf-8")
			contact = (name, industry, headline)
            #conn['firstName'].encode("utf-8"), conn['lastName'].encode("utf-8"), conn['id'].encode("utf-8"), , conn['picture-url'].encode("utf-8"), conn['location'].encode("utf-8")
			all.append(contact)
			categorized[industry].append(contact)
		except KeyError: pass
		index = index+1

	for key in categorized:
		if len(categorized[key])*1000/index > 10 :
			countdata[key] = len(categorized[key])
			print key,countdata[key]
	return render_template("connections.html", title = 'Connections', all_conn=all, cat_conn=categorized, cat_count = countdata, user=user)
コード例 #30
0
 def edit_quantiles(self,q=.01,quantile_range=False,v=False,write=True):
     basic.log('creating edit quantiles %s' % self.lang)
     f_out = basic.create_dir('results/quantiles')
     df = pd.read_csv(self.db_path)
     df = self.drop_dups(df)
     df.page_id = df.page_id.astype(int)
     if self.drop1:
         df = df.loc[(df['len'] > 1)]
     q = np.arange(q,1+q,q)
     results = defaultdict(dict)
     for n in self.namespace:
         results[n] = defaultdict(dict)
         for r in self.revert:
             basic.log('%s %s %s' % (self.lang,n,r))
             if n == 'at':
                 result = df[r].quantile(q=q)
                 mean = df[r].mean()
             else:
                 result = df.loc[(df['namespace'] == self.namespace.index(n)),r].quantile(q=q)
                 #qcut = pd.qcut(df.loc[(df['namespace'] == self.namespace.index(n)),r],q)
                 #print(qcut)
                 mean = df.loc[(df['namespace'] == self.namespace.index(n)),r].mean()
             result = result.to_frame()
             column = '%s_%s_%s' % (self.lang,n,r)
             result.columns = [column]
             results[n][r] = {'quantiles':result,'mean':mean}
             if write:
                 result = result.append(DataFrame({column:result.loc[(result[column] < int(mean+1))].tail(1).index.values},index=['mean_quantile']))
                 result = result.append(DataFrame({column:mean},index=['mean_value']))
                 result.to_csv('%s/%s_%s_%s.csv' % (f_out,self.lang,n,r),encoding='utf-8',index_label='qauntiles')
     return results
コード例 #31
0
    token_id_freq = []
    for token, id in sorted_token_id:
        if token_id_freq:
            prev_tok, prev_id, prev_freq = token_id_freq[-1]
            if prev_tok == token and prev_id == id:
                token_id_freq[-1] = (token, id, prev_freq + 1)
            else:
                token_id_freq.append((token, id, 1))
        else:
            token_id_freq.append((token, id, 1))
    return token_id_freq


token_id_freq = merge_token_in_report(sorted_token_id)

dictionary = defaultdict(lambda: (0, 0))
postings = defaultdict(lambda: [])

#fill in dictionary
for token, id, freq in token_id_freq:
    dictionary[token] = (dictionary[token][0] + 1, dictionary[token][1] + freq)

#fill in postings
for token, id, freq in token_id_freq:
    postings[token].append((id, freq))

# Sort the postings
for key, values in postings.items():
    postings[key] = sorted(values, key=itemgetter(0))

コード例 #32
0
ファイル: debug.py プロジェクト: numirias/qtile-plasma
 def __init__(self, width, height):
     self.width = width
     self.height = height
     self.canvas = defaultdict(lambda: defaultdict(lambda: '#'))
コード例 #33
0
ファイル: ethernet.py プロジェクト: sigma-random/halucinator
class EthernetModel(object):

    frame_queues = defaultdict(deque)
    calc_crc = True
    rx_frame_isr = None
    rx_isr_enabled = False
    frame_times = defaultdict(deque)  # Used to record reception time

    @classmethod
    def enable_rx_isr(cls, interface_id):
        cls.rx_isr_enabled = True
        if cls.frame_queues[interface_id] and cls.rx_frame_isr is not None:
            Interrupts.trigger_interrupt(cls.rx_frame_isr, 'Ethernet_RX_Frame')

    @classmethod
    def disable_rx_isr(self, interface_id):
        EthernetModel.rx_isr_enabled = False

    @classmethod
    @peripheral_server.tx_msg
    def tx_frame(cls, interface_id, frame):
        '''
            Creates the message that Peripheral.tx_msga will send on this 
            event
        '''
        print("Sending Frame (%i): " % len(frame), binascii.hexlify(frame))
        # print ""
        msg = {'interface_id': interface_id, 'frame': frame}
        return msg

    @classmethod
    @peripheral_server.reg_rx_handler
    def rx_frame(cls, msg):
        '''
            Processes reception of this type of message from 
            PeripheralServer.rx_msg
        '''
        interface_id = msg['interface_id']
        log.info("Adding Frame to: %s" % interface_id)
        frame = msg['frame']
        cls.frame_queues[interface_id].append(frame)
        cls.frame_times[interface_id].append(time.time())
        log.info("Adding Frame to: %s" % interface_id)
        if cls.rx_frame_isr is not None and cls.rx_isr_enabled:
            Interrupts.trigger_interrupt(cls.rx_frame_isr, 'Ethernet_RX_Frame')

    @classmethod
    def get_rx_frame(cls, interface_id, get_time=False):
        frame = None
        rx_time = None
        log.info("Checking for: %s" % str(interface_id))
        if cls.frame_queues[interface_id]:
            log.info("Returning frame")
            frame = cls.frame_queues[interface_id].popleft()
            rx_time = cls.frame_times[interface_id].popleft()

        if get_time:
            return frame, rx_time
        else:
            return frame

    @classmethod
    def get_frame_info(cls, interface_id):
        '''
            return number of frames and length of first frame
        '''
        queue = cls.frame_queues[interface_id]
        if queue:
            return len(queue), len(queue[0])
        return 0, 0
コード例 #34
0
from collections import defaultdict

N = int(input().strip())
W = sorted([int(x) for x in input().strip().split()])
toys = defaultdict(lambda: [])

price = 1
border_weight = W[0] + 4

for w in W:
    if border_weight < w:
        border_weight = w + 4
    else:
        pass

    toys[border_weight].append(w)

print(len(toys))
コード例 #35
0
                for word in words:
                    uword = word.lower().replace('ё', 'е')
                    if uword not in vocabulary and not is_int(word) and word not in '. ? ! : - , — – ) ( " \' « » „ “ ; …'.split():
                        if uword not in oov_tokens:
                            wrt.write('Sample with oov-word "{}":\n'.format(word))
                            wrt.write('Question:        {}\n'.format(sample.question))
                            wrt.write('Short answer:    {}\n'.format(sample.short_answer))
                            wrt.write('Expanded answer: {}\n'.format(sample.expanded_answer))
                            wrt.write('\n\n')
                            oov_tokens.add(uword)
                            break

    # Делаем морфологические шаблоны для модели интерпретации knn-1
    print('Building knn1 templates from {} samples...'.format(len(samples2)))
    templates2 = collections.Counter()
    packed2samples = collections.defaultdict(list)
    for sample in samples2:
        # НАЧАЛО ОТЛАДКИ
        #if 'зовут' not in sample.left or sample.short_phrase.lower() != 'илья':
        #    continue
        # КОНЕЦ ОТЛАДКИ

        context = [s.strip() for s in sample.left.split('|')] + [sample.short_phrase]
        expanded_tokens = lemmatizer.lemmatize(tagger.tag(tokenizer.tokenize(sample.expanded_phrase)))
        context_templates = [create_context_template(iline, line_str, expanded_tokens) for iline, line_str in enumerate(context)]
        if any((z is None) for z in context_templates):
            continue

        expanded_template = create_expanded_template(context_templates, expanded_tokens)

        # выкидываем из контекста леммы, так они были нужны только для
コード例 #36
0
 def clear(self):
     """ Clears the record batch builder.
     """
     self.batch_ = defaultdict(list)
     self.batch_sizes_ = {}
コード例 #37
0
    def isEscapePossible(self, blocked: List[List[int]], source: List[int],
                         target: List[int]) -> bool:
        blocked_rows = defaultdict(list)
        bloked_cols = defaultdict(list)

        for row, col in sorted(blocked):
            blocked_rows[col].append(row)
            bloked_cols[row].append(col)

        target_col = target[1]
        target_row = target[0]

        visited = set()

        # directions
        RIGHT = 0
        DOWN = 1
        LEFT = 2
        TOP = 3

        def dfs(row, col):
            if row == target_row and col == target_col:
                return True

            for direction in RIGHT, DOWN, LEFT, TOP:
                next_row = row
                next_col = col
                if direction == RIGHT:
                    i = bisect_right(bloked_cols[row], col)

                    next_blocked_col = float('inf')
                    if i < len(bloked_cols[row]):
                        next_blocked_col = bloked_cols[row][i]

                    next_col = min(next_blocked_col - 1, target_col)
                elif direction == LEFT:
                    i = bisect_right(bloked_cols[row], col) - 1

                    next_blocked_col = -1
                    if i >= 0:
                        next_blocked_col = bloked_cols[row][i]

                    next_col = min(next_blocked_col + 1, target_col)
                elif direction == DOWN:
                    i = bisect_right(blocked_rows[col], row)

                    next_blocked_row = float('inf')
                    if i < len(blocked_rows[col]):
                        next_blocked_row = blocked_rows[col][i]

                    next_row = min(next_blocked_row - 1, target_row)
                elif direction == TOP:
                    i = bisect_right(blocked_rows[col], row) - 1

                    next_blocked_row = -1
                    if i >= 0:
                        next_blocked_row = blocked_rows[col][i]

                    next_row = min(next_blocked_row + 1, target_row)

                key = (next_row, next_col)

                if key in visited:
                    continue

                visited.add(key)

                if dfs(next_row, next_col):
                    return True

            return False

        return dfs(source[0], source[1])
コード例 #38
0
import re
from os.path import join
import argparse
from collections import defaultdict

parser = argparse.ArgumentParser()
parser.add_argument(
    "meta",
    help=
    "Required. the FULL path to the tab delimited meta file containing run info"
)
args = parser.parse_args()

assert args.meta is not None, "please provide the path to the meta file"

FILES = defaultdict(lambda: defaultdict(list))

with open(args.meta, "r") as f:
    reader = csv.reader(f, delimiter="\t")
    # skip the header
    header = next(reader)
    for row in reader:
        run_id = row[0].strip()
        flow_cell = row[1].strip()
        sample = row[2].strip()
        # This is name for the fastq folder
        batch_name = row[3].strip()
        csv_file = row[4].strip()
        ## now just assume the file name in the metafile contained in the fastq file path
        FILES[sample][batch_name].append(run_id)
        FILES[sample][batch_name].append(flow_cell)
コード例 #39
0
ファイル: java_class.py プロジェクト: Vector35/kaitai
 def __init__(self, _io, _parent=None, _root=None):
     self._io = _io
     self._parent = _parent
     self._root = _root if _root else self
     self._debug = collections.defaultdict(dict)
コード例 #40
0
'''i/p: ['ashok', 'hari', 'bhanu', 'anil', 'bharath', 'anvesh', 'uday', 'raja']
o/p: {'a': ['ashok', 'anil', 'anvesh'], 'b': ['bhanu', 'bharath'], 'h': ['hari'], 
'u': ['uday'], 'r': ['raja']}'''

val = ['ashok', 'hari', 'bhanu', 'anil', 'bharath', 'anvesh', 'uday', 'raja']
d = {}

for i in val: # 'ashok'
    if i[0] not in d:
        d[i[0]] = []        #{'a': [], 'h':[]}
    d[i[0]].append(i)  #{'a': ['ashok'], 'h': ['hari']}

print(d)

from collections import defaultdict
df = defaultdict(set)
print(df)
for i in val:
    df[i[0]].add(i)
print(df)


a = [3, 4, 5, 6, 4, 6, 6, 4, 5]
'''o/p :{3:1, 4:3, 5:2, 6:3}'''
di = {}
#di = defaultdict(int)
for i in a:              #0 + 1
    #d[i] += 1  # d[i] = d[i] + 1
    di[i] = a.count(i)

コード例 #41
0
    def get_process_state(self, name, pids, try_sudo):
        st = defaultdict(list)

        # Remove from cache the processes that are not in `pids`
        cached_pids = set(self.process_cache[name].keys())
        pids_to_remove = cached_pids - pids
        for pid in pids_to_remove:
            del self.process_cache[name][pid]

        for pid in pids:
            st['pids'].append(pid)

            new_process = False
            # If the pid's process is not cached, retrieve it
            if pid not in self.process_cache[
                    name] or not self.process_cache[name][pid].is_running():
                new_process = True
                try:
                    self.process_cache[name][pid] = psutil.Process(pid)
                    self.log.debug('New process in cache: {}'.format(pid))
                # Skip processes dead in the meantime
                except psutil.NoSuchProcess:
                    self.warning(
                        'Process {} disappeared while scanning'.format(pid))
                    # reset the PID cache now, something changed
                    self.last_pid_cache_ts[name] = 0
                    continue

            p = self.process_cache[name][pid]

            meminfo = self.psutil_wrapper(p, 'memory_info', ['rss', 'vms'],
                                          try_sudo)
            st['rss'].append(meminfo.get('rss'))
            st['vms'].append(meminfo.get('vms'))

            mem_percent = self.psutil_wrapper(p, 'memory_percent', None,
                                              try_sudo)
            st['mem_pct'].append(mem_percent)

            # will fail on win32 and solaris
            shared_mem = self.psutil_wrapper(p, 'memory_info', ['shared'],
                                             try_sudo).get('shared')
            if shared_mem is not None and meminfo.get('rss') is not None:
                st['real'].append(meminfo['rss'] - shared_mem)
            else:
                st['real'].append(None)

            ctxinfo = self.psutil_wrapper(p, 'num_ctx_switches',
                                          ['voluntary', 'involuntary'],
                                          try_sudo)
            st['ctx_swtch_vol'].append(ctxinfo.get('voluntary'))
            st['ctx_swtch_invol'].append(ctxinfo.get('involuntary'))

            st['thr'].append(
                self.psutil_wrapper(p, 'num_threads', None, try_sudo))

            cpu_percent = self.psutil_wrapper(p, 'cpu_percent', None, try_sudo)
            cpu_count = psutil.cpu_count()
            if not new_process:
                # psutil returns `0.` for `cpu_percent` the
                # first time it's sampled on a process,
                # so save the value only on non-new processes
                st['cpu'].append(cpu_percent)
                if cpu_count > 0 and cpu_percent is not None:
                    st['cpu_norm'].append(cpu_percent / cpu_count)
                else:
                    self.log.debug(
                        'could not calculate the normalized cpu pct, cpu_count: {}'
                        .format(cpu_count))
            st['open_fd'].append(
                self.psutil_wrapper(p, 'num_fds', None, try_sudo))
            st['open_handle'].append(
                self.psutil_wrapper(p, 'num_handles', None, try_sudo))

            ioinfo = self.psutil_wrapper(
                p, 'io_counters',
                ['read_count', 'write_count', 'read_bytes', 'write_bytes'],
                try_sudo)
            st['r_count'].append(ioinfo.get('read_count'))
            st['w_count'].append(ioinfo.get('write_count'))
            st['r_bytes'].append(ioinfo.get('read_bytes'))
            st['w_bytes'].append(ioinfo.get('write_bytes'))

            pagefault_stats = self.get_pagefault_stats(pid)
            if pagefault_stats is not None:
                (minflt, cminflt, majflt, cmajflt) = pagefault_stats
                st['minflt'].append(minflt)
                st['cminflt'].append(cminflt)
                st['majflt'].append(majflt)
                st['cmajflt'].append(cmajflt)
            else:
                st['minflt'].append(None)
                st['cminflt'].append(None)
                st['majflt'].append(None)
                st['cmajflt'].append(None)

            # calculate process run time
            create_time = self.psutil_wrapper(p, 'create_time', None, try_sudo)
            if create_time is not None:
                now = time.time()
                run_time = now - create_time
                st['run_time'].append(run_time)

        return st
コード例 #42
0
import xml.etree.ElementTree as Tree


lyp_attr   = ['layer', 'datatype', 'source', 'fill-color', 'frame-color',
    'frame-brightness', 'fill-brightness', 'dither-pattern', 'valid',
    'visible', 'transparent', 'width', 'marked', 'animation']
nazca_attr = ['layer', 'datatype', 'name', 'fill_color', 'frame_color',
    'frame_brightness', 'fill_brightness', 'dither_pattern', 'valid',
    'visible', 'transparent', 'width', 'marked', 'animation']


doPrint = False
#==============================================================================
# lyp2csv
#==============================================================================
tabdict = defaultdict(list)
depth=0
def __parse_properties(lev1, infolevel=0):
    """Parse lyp tags <properties> and <group-member> levels."""
    global tabdict
    global depth

    depth += 1
    tabdict['depth'].append(depth)
    for lev2 in lev1:
        tag = lev2.tag
        value = lev1.find(tag).text
        if infolevel > 2:
            if tag == 'group-members': # remove linefeed
                value = ''
            print("{}{}: {}".format('  '*depth, tag, value))
コード例 #43
0
    def __init__(self, config: FrigateConfig, client, topic_prefix,
                 tracked_objects_queue, event_queue, event_processed_queue,
                 stop_event):
        threading.Thread.__init__(self)
        self.name = "detected_frames_processor"
        self.config = config
        self.client = client
        self.topic_prefix = topic_prefix
        self.tracked_objects_queue = tracked_objects_queue
        self.event_queue = event_queue
        self.event_processed_queue = event_processed_queue
        self.stop_event = stop_event
        self.camera_states: Dict[str, CameraState] = {}
        self.frame_manager = SharedMemoryFrameManager()

        def start(camera, obj: TrackedObject, current_frame_time):
            self.event_queue.put(('start', camera, obj.to_dict()))

        def update(camera, obj: TrackedObject, current_frame_time):
            after = obj.to_dict()
            message = {
                'before': obj.previous,
                'after': after,
                'type': 'new' if obj.previous['false_positive'] else 'update'
            }
            self.client.publish(f"{self.topic_prefix}/events",
                                json.dumps(message),
                                retain=False)
            obj.previous = after

        def end(camera, obj: TrackedObject, current_frame_time):
            snapshot_config = self.config.cameras[camera].snapshots
            event_data = obj.to_dict(include_thumbnail=True)
            event_data['has_snapshot'] = False
            if not obj.false_positive:
                message = {
                    'before': obj.previous,
                    'after': obj.to_dict(),
                    'type': 'end'
                }
                self.client.publish(f"{self.topic_prefix}/events",
                                    json.dumps(message),
                                    retain=False)
                # write snapshot to disk if enabled
                if snapshot_config.enabled:
                    jpg_bytes = obj.get_jpg_bytes(
                        timestamp=snapshot_config.timestamp,
                        bounding_box=snapshot_config.bounding_box,
                        crop=snapshot_config.crop,
                        height=snapshot_config.height)
                    with open(
                            os.path.join(CLIPS_DIR,
                                         f"{camera}-{obj.obj_data['id']}.jpg"),
                            'wb') as j:
                        j.write(jpg_bytes)
                    event_data['has_snapshot'] = True
            self.event_queue.put(('end', camera, event_data))

        def snapshot(camera, obj: TrackedObject, current_frame_time):
            mqtt_config = self.config.cameras[camera].mqtt
            if mqtt_config.enabled:
                jpg_bytes = obj.get_jpg_bytes(
                    timestamp=mqtt_config.timestamp,
                    bounding_box=mqtt_config.bounding_box,
                    crop=mqtt_config.crop,
                    height=mqtt_config.height)
                self.client.publish(
                    f"{self.topic_prefix}/{camera}/{obj.obj_data['label']}/snapshot",
                    jpg_bytes,
                    retain=True)

        def object_status(camera, object_name, status):
            self.client.publish(f"{self.topic_prefix}/{camera}/{object_name}",
                                status,
                                retain=False)

        for camera in self.config.cameras.keys():
            camera_state = CameraState(camera, self.config, self.frame_manager)
            camera_state.on('start', start)
            camera_state.on('update', update)
            camera_state.on('end', end)
            camera_state.on('snapshot', snapshot)
            camera_state.on('object_status', object_status)
            self.camera_states[camera] = camera_state

        # {
        #   'zone_name': {
        #       'person': {
        #           'camera_1': 2,
        #           'camera_2': 1
        #       }
        #   }
        # }
        self.zone_data = defaultdict(lambda: defaultdict(lambda: {}))
コード例 #44
0
ファイル: ovs_controller.py プロジェクト: orca-project/hoen
    def __init__(self, *args, **kwargs):
        super(ovs_ctl, self).__init__(*args, **kwargs)
        self.mac_to_port = {}
        self.switches = {}

        self.dpid_to_name = {
            # Original environment -- uncomment the 3 lines below
            #  95536754289: 'h00',
            #  95535344413: 'h01',
            #  95542363502: 'h02'
            # ORCA first demo -- uncomment the 3 lines below
            #  95534111059: 'h00',
            #  95538556217: 'h01',
            #  95533205304: 'h02'
            # Virtual Machine SONAr -- uncomment the 5 lines below
            #  95532435104: 's01',
            #  95533179799: 's02',
            #  95532162947: 's03',
            #  95539282496: 's04',
            #  95533558180: 's05'
            # ORCA second demo -- uncomment the 5 lines below:
            #  95532594594: 's01',
            #  95534454058: 's02',
            #  95536781980: 's03',
            #  95531791552: 's04',
            #  47102661227: 's05'
            #  95532050795: 's05'
            # ORCA Final Demo
            int('000000163ea46de1', 16): 's01',
            int('000000163e1d4d1f', 16): 's02',
            int('0000000af789926b', 16): 's03',
            int('000000163e784ab7', 16): 's04'
        }

        self.topology = defaultdict(dict)
        self.topology['s01']['s02'] = 1
        self.topology['s01']['s04'] = 2
        self.topology['s02']['s03'] = 1
        self.topology['s02']['s01'] = 2
        self.topology['s03']['s04'] = 1
        self.topology['s03']['s02'] = 2
        self.topology['s04']['s01'] = 1
        self.topology['s04']['s03'] = 2

        self.speed = defaultdict(dict)
        self.speed['s01']['s02'] = 1000
        self.speed['s01']['s04'] = 1000
        self.speed['s02']['s03'] = 1000
        self.speed['s02']['s01'] = 1000
        self.speed['s03']['s04'] = 1000
        self.speed['s03']['s02'] = 1000
        self.speed['s04']['s01'] = 1000
        self.speed['s04']['s03'] = 1000

        self.ports = {}
        self.arp_disabled_ports = self.ports_to_disable()
        self.control = {}

        self.waiters = {}

        #  Instantiate the OVS SDR Controller
        self.ovs_controller_thread = ovs_controller(
            name='OVS',
            req_header='ovs_req', # Don't modify
            rep_header='ovs_rep', # Don't modify
            create_msg='ovc_crs',
            request_msg='ovc_rrs',
            update_msg='ovc_urs',
            delete_msg='ovc_drs',
            topology_msg='ovc_trs',
            host=kwargs.get('host', '0.0.0.0'),
            port=kwargs.get('port', 3200),
            ovs=self
        )

        # Start the OVS SDR Controller Server
        self.ovs_controller_hub = hub.spawn(self.ovs_controller_thread.run)

        self.count = len(self.topology)
        self.switch_config_count = {}
        self.single = {}
        self.st = time.time()
コード例 #45
0
 def __init__(self, items=None):
     self._collection = defaultdict(list)
     self._add_items_impl(self._collection, take_with_default(items, []))
コード例 #46
0
ファイル: legislators.py プロジェクト: annerajb/openstates
    def scrape(self, chamber, term):
        urls = {'lower': "http://www.msa.md.gov/msa/mdmanual/06hse/html/hseal.html",
                'upper': "http://www.msa.md.gov/msa/mdmanual/05sen/html/senal.html"}
        detail_re = re.compile('\((R|D)\), (?:Senate President, )?(?:House Speaker, )?District (\w+)')

        with self.urlopen(urls[chamber]) as html:
            doc = lxml.html.fromstring(html)

            # rest of data on this page is <li>s that have anchor tags
            for a in doc.cssselect('li a'):
                link = a.get('href')
                # tags don't close so we get the <li> and <a> content and diff them
                name_text = a.text_content()
                detail_text = a.getparent().text_content().replace(name_text, '')

                # ignore if it is not a valid link
                if link:
                    # handle names
                    names = name_text.split(',')
                    last_name = names[0]
                    first_name = names[1].strip()
                    # TODO: try to trim first name to remove middle initial
                    if len(names) > 2:
                        suffixes = names[2]
                    else:
                        suffixes = ''

                    # handle details
                    details = detail_text.strip()
                    party, district = detail_re.match(details).groups()
                    party = PARTY_DICT[party]

                    leg_url = BASE_URL+link

                    leg = Legislator(term, chamber, district,
                                     ' '.join((first_name, last_name)),
                                     first_name, last_name,
                                     party=party, suffixes=suffixes,
                                     url=leg_url)
                    leg.add_source(url=leg_url)

                    with self.urlopen(leg_url) as leg_html:
                        leg_doc = lxml.html.fromstring(leg_html)
                        img_src = leg_doc.xpath('//img[@align="left"]/@src')
                        if img_src:
                            leg['photo_url'] = BASE_URL + img_src[0]

                        # address extraction
                        # this is pretty terrible, we get address in a format that looks
                        # like:
                        #   James Senate Office Building, Room 322
                        #   11 Bladen St., Annapolis, MD 21401
                        #   (410) 841-3565, (301) 858-3565; 1-800-492-7122, ext. 3565 (toll free)
                        #   e-mail: [email protected]
                        #   fax: (410) 841-3552, (301) 858-3552
                        #
                        #   Western Maryland Railway Station, 13 Canal St., Room 304, Cumberland, MD 21502
                        #   (301) 722-4780; 1-866-430-9553 (toll free)
                        #   e-mail: [email protected]
                        #   fax: (301) 722-4790
                        # usually first ul, sometimes first p
                        try:
                            addr_lines = leg_doc.xpath('//ul')[0].text_content().strip().splitlines()
                        except IndexError:
                            addr_lines = leg_doc.xpath('//p')[0].text_content().strip().splitlines()
                        addr_pieces = {'capitol': defaultdict(str),
                                       'district': defaultdict(str)}
                        addr_type = 'capitol'
                        for line in addr_lines:
                            if '(401)' in line or '(301)' in line:
                                addr_pieces[addr_type]['phone'] = line
                            elif 'toll free' in line:
                                pass # skip stand alone 1-800 numbers
                            elif 'e-mail' in line:
                                addr_pieces[addr_type]['email'] = line.replace('email: ',
                                                                               '')
                            elif 'fax' in line:
                                addr_pieces[addr_type]['fax'] = line.replace('fax: ', '')
                            elif line == '':
                                addr_type = 'district'
                            else:
                                addr_pieces[addr_type]['address'] += '{0}\n'.format(line)
                        if addr_pieces['capitol']:
                            leg.add_office('capitol', 'Capitol Office',
                                           **addr_pieces['capitol'])
                            leg['email'] = (addr_pieces['capitol']['email'] or
                                            addr_pieces['district']['email'] or
                                            None)
                        if addr_pieces['district']:
                            leg.add_office('district', 'District Office',
                                           **addr_pieces['district'])

                    self.save_legislator(leg)
コード例 #47
0
# _*_ coding :utf-8 _*_
__author__ = 'du'
__blog__ = 'www.cnblogs.com/anmutu;www.zmfei4.com;'
__date__ = '2020/1/6 2:01'

from collections import defaultdict
from datetime import datetime

# 效率最高的其实就是这个记忆搜索法。

total = defaultdict(int)
total_memory = defaultdict(int)


def fib_recursion(k):
    assert k > 0, "k的值必须大于0"
    if k in [1, 2]:
        return 1
    else:
        global total
        total[k] += 1
        return fib_recursion(k - 2) + fib_recursion(k - 1)


# 将值存到total里,在递归前判断是否在这个total里,如果在就取total里的值,如果不在就将新值存到total里.
def fib_recursion_memory(k):
    assert k > 0, "k的值必须大于0"
    if k in [1, 2]:
        return 1
    global total_memory
    if k in total_memory:
コード例 #48
0
ファイル: diff.py プロジェクト: BlackstoneMr/astropy
    def __init__(self, a, b, ignore_keywords=[], ignore_comments=[],
                 tolerance=0.0, ignore_blanks=True, ignore_blank_cards=True):
        """
        See `FITSDiff` for explanations of the initialization parameters.
        """

        self.ignore_keywords = set(k.upper() for k in ignore_keywords)
        self.ignore_comments = set(k.upper() for k in ignore_comments)

        self.tolerance = tolerance
        self.ignore_blanks = ignore_blanks
        self.ignore_blank_cards = ignore_blank_cards

        self.ignore_keyword_patterns = set()
        self.ignore_comment_patterns = set()
        for keyword in list(self.ignore_keywords):
            keyword = keyword.upper()
            if keyword != '*' and glob.has_magic(keyword):
                self.ignore_keywords.remove(keyword)
                self.ignore_keyword_patterns.add(keyword)
        for keyword in list(self.ignore_comments):
            keyword = keyword.upper()
            if keyword != '*' and glob.has_magic(keyword):
                self.ignore_comments.remove(keyword)
                self.ignore_comment_patterns.add(keyword)

        # Keywords appearing in each header
        self.common_keywords = []

        # Set to the number of keywords in each header if the counts differ
        self.diff_keyword_count = ()

        # Set if the keywords common to each header (excluding ignore_keywords)
        # appear in different positions within the header
        # TODO: Implement this
        self.diff_keyword_positions = ()

        # Keywords unique to each header (excluding keywords in
        # ignore_keywords)
        self.diff_keywords = ()

        # Keywords that have different numbers of duplicates in each header
        # (excluding keywords in ignore_keywords)
        self.diff_duplicate_keywords = {}

        # Keywords common to each header but having different values (excluding
        # keywords in ignore_keywords)
        self.diff_keyword_values = defaultdict(lambda: [])

        # Keywords common to each header but having different comments
        # (excluding keywords in ignore_keywords or in ignore_comments)
        self.diff_keyword_comments = defaultdict(lambda: [])

        if isinstance(a, string_types):
            a = Header.fromstring(a)
        if isinstance(b, string_types):
            b = Header.fromstring(b)

        if not (isinstance(a, Header) and isinstance(b, Header)):
            raise TypeError('HeaderDiff can only diff astropy.io.fits.Header '
                            'objects or strings containing FITS headers.')

        super(HeaderDiff, self).__init__(a, b)
コード例 #49
0
ファイル: ifs.py プロジェクト: mon/ifstools
    def extract(self,
                progress=True,
                recurse=True,
                tex_only=False,
                extract_manifest=False,
                path=None,
                rename_dupes=False,
                **kwargs):
        if path is None:
            path = self.folder_out
        if tex_only:
            kwargs['use_cache'] = False
        utils.mkdir_silent(path)
        utime(path, (self.time, self.time))

        if extract_manifest and self.manifest and not tex_only:
            with open(join(path, 'ifs_manifest.xml'), 'wb') as f:
                f.write(self.manifest.to_text().encode('utf8'))

        # build the tree
        for folder in self.tree.all_folders:
            if tex_only and folder.name == 'tex':
                self.tree = folder
                # make it root to discourage repacking
                folder.name = ''
                for f in folder.all_files:
                    f.path = ''
                break
            elif tex_only:
                continue
            f_path = join(path, folder.full_path)
            utils.mkdir_silent(f_path)
            utime(f_path, (self.time, self.time))

            # handle different-case-but-same-name for Windows
            same_name = defaultdict(list)
            for name, obj in folder.files.items():
                same_name[name.lower()].append(obj)

            for files in same_name.values():
                # common base case of "sane ifs file"
                if len(files) == 1:
                    continue

                # make them 'a (1)', 'a (2)' etc
                if rename_dupes:
                    for i, f in enumerate(files[1:]):
                        base, ext = splitext(f.name)
                        f.name = base + ' ({})'.format(i + 1) + ext
                elif progress:  # warn if not silenced
                    all_names = ', '.join([f.name for f in files])
                    tqdm.write(
                        'WARNING: Files with same name and differing case will overwrite on Windows ({}). '
                        .format(all_names) +
                        'Use --rename-dupes to extract without loss')
                # else just do nothing

        # extract the files
        for f in tqdm(self.tree.all_files, disable=not progress):
            # allow recurse + tex_only to extract ifs files
            if tex_only and not isinstance(f, ImageFile) and not isinstance(
                    f, ImageCanvas) and not (recurse
                                             and f.name.endswith('.ifs')):
                continue
            f.extract(path, **kwargs)
            if progress:
                tqdm.write(f.full_path)
            if recurse and f.name.endswith('.ifs'):
                rpath = join(path, f.full_path)
                i = IFS(rpath)
                i.extract(progress=progress,
                          recurse=recurse,
                          tex_only=tex_only,
                          extract_manifest=extract_manifest,
                          path=rpath.replace('.ifs', '_ifs'),
                          rename_dupes=rename_dupes,
                          **kwargs)

        # you can't pickle open files, so this won't work. Perhaps there is a way around it?
        '''to_extract = (f for f in self.tree.all_files if not(tex_only and not isinstance(f, ImageFile) and not isinstance(f, ImageCanvas)))
コード例 #50
0
def _get_empty_index():
    return defaultdict(set)
コード例 #51
0
ファイル: 4.py プロジェクト: ZMbiubiubiu/For_Test
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019-07-10 16:02
# @Author  : bingo
# @Site    : 
# @File    : 4.py
# @Software: PyCharm

"""
任一个英文的纯文本文件,统计其中的单词出现的个数
"""

from collections import defaultdict
import re

d = defaultdict(int)
# 匹配单词的正则表达式
PATTERN = re.compile('[a-zA-Z]+')


def get_word_from_file(file: str) -> 'generator':
    with open(file, 'r') as f:
        for line in f:
            line = line.strip()
            results = PATTERN.finditer(line)
            for word in results:
                yield word.group().lower()


for word in get_word_from_file('4_articles.txt'):
    d[word] += 1
コード例 #52
0
 def __init__(self, vertices):
     self.V = vertices
     self.graph = defaultdict(list)
     self.degree = [0] * vertices
コード例 #53
0
def tree():
    return defaultdict(tree)
コード例 #54
0
def get_data(input_filename, start_date=None, end_date=None):
    casos = read_cases(input_filename, order_by="date")
    dates = sorted(set(c.date for c in casos))
    start_date = start_date or dates[0]
    end_date = end_date or dates[-1]
    caso_by_key = defaultdict(list)
    for caso in casos:
        caso_by_key[row_key(caso)].append(caso)
    for place_cases in caso_by_key.values():
        place_cases.sort(key=lambda row: row.date, reverse=True)

    order_key = attrgetter("order_for_place")
    last_case_for_place = {}
    order_for_place = Counter()
    for date in date_range(start_date, end_date + datetime.timedelta(days=1), "daily"):
        for place_key in demographics.place_keys():
            place_type, state, city = place_key
            place_cases = caso_by_key[place_key]
            valid_place_cases = sorted(
                [item for item in place_cases if item.date <= date], key=order_key, reverse=True,
            )
            if not valid_place_cases:
                # There are no cases for this city for this date - skip
                continue

            # This place has at least one case for this date (or before),
            # so use the newest one.
            last_valid_case = valid_place_cases[0]
            newest_case = place_cases[0]
            is_last = date == last_valid_case.date == newest_case.date
            order_for_place[place_key] += 1
            new_case = {
                "city": city,
                "city_ibge_code": last_valid_case.city_ibge_code,
                "date": date,
                "epidemiological_week": epidemiological_week(date),
                "estimated_population": last_valid_case.estimated_population,
                "estimated_population_2019": last_valid_case.estimated_population_2019,
                "is_last": is_last,
                "is_repeated": last_valid_case.date != date,
                "last_available_confirmed": last_valid_case.confirmed,
                "last_available_confirmed_per_100k_inhabitants": last_valid_case.confirmed_per_100k_inhabitants,
                "last_available_date": last_valid_case.date,
                "last_available_death_rate": last_valid_case.death_rate,
                "last_available_deaths": last_valid_case.deaths,
                "order_for_place": order_for_place[place_key],
                "place_type": place_type,
                "state": state,
            }

            last_case = last_case_for_place.get(place_key, None)
            if last_case is None:
                new_confirmed = new_case["last_available_confirmed"]
                new_deaths = new_case["last_available_deaths"]
            else:
                new_confirmed = new_case["last_available_confirmed"] - last_case["last_available_confirmed"]
                new_deaths = new_case["last_available_deaths"] - last_case["last_available_deaths"]
            new_case["new_confirmed"] = new_confirmed
            new_case["new_deaths"] = new_deaths
            last_case_for_place[place_key] = new_case

            yield new_case
コード例 #55
0
    def processAlgorithm(self, parameters, context, feedback):
        source = self.parameterAsSource(parameters, self.INPUT, context)
        if source is None:
            raise QgsProcessingException(
                self.invalidSourceError(parameters, self.INPUT))

        value_field_name = self.parameterAsString(parameters,
                                                  self.VALUES_FIELD_NAME,
                                                  context)
        category_field_names = self.parameterAsFields(
            parameters, self.CATEGORIES_FIELD_NAME, context)

        value_field_index = source.fields().lookupField(value_field_name)
        if value_field_index >= 0:
            value_field = source.fields().at(value_field_index)
        else:
            value_field = None
        category_field_indexes = [
            source.fields().lookupField(n) for n in category_field_names
        ]

        # generate output fields
        fields = QgsFields()
        for c in category_field_indexes:
            fields.append(source.fields().at(c))

        def addField(name):
            """
            Adds a field to the output, keeping the same data type as the value_field
            """
            field = QgsField(value_field)
            field.setName(name)
            fields.append(field)

        if value_field is None:
            field_type = 'none'
            fields.append(QgsField('count', QVariant.Int))
        elif value_field.isNumeric():
            field_type = 'numeric'
            fields.append(QgsField('count', QVariant.Int))
            fields.append(QgsField('unique', QVariant.Int))
            fields.append(QgsField('min', QVariant.Double))
            fields.append(QgsField('max', QVariant.Double))
            fields.append(QgsField('range', QVariant.Double))
            fields.append(QgsField('sum', QVariant.Double))
            fields.append(QgsField('mean', QVariant.Double))
            fields.append(QgsField('median', QVariant.Double))
            fields.append(QgsField('stddev', QVariant.Double))
            fields.append(QgsField('minority', QVariant.Double))
            fields.append(QgsField('majority', QVariant.Double))
            fields.append(QgsField('q1', QVariant.Double))
            fields.append(QgsField('q3', QVariant.Double))
            fields.append(QgsField('iqr', QVariant.Double))
        elif value_field.type() in (QVariant.Date, QVariant.Time,
                                    QVariant.DateTime):
            field_type = 'datetime'
            fields.append(QgsField('count', QVariant.Int))
            fields.append(QgsField('unique', QVariant.Int))
            fields.append(QgsField('empty', QVariant.Int))
            fields.append(QgsField('filled', QVariant.Int))
            # keep same data type for these fields
            addField('min')
            addField('max')
        else:
            field_type = 'string'
            fields.append(QgsField('count', QVariant.Int))
            fields.append(QgsField('unique', QVariant.Int))
            fields.append(QgsField('empty', QVariant.Int))
            fields.append(QgsField('filled', QVariant.Int))
            # keep same data type for these fields
            addField('min')
            addField('max')
            fields.append(QgsField('min_length', QVariant.Int))
            fields.append(QgsField('max_length', QVariant.Int))
            fields.append(QgsField('mean_length', QVariant.Double))

        request = QgsFeatureRequest().setFlags(QgsFeatureRequest.NoGeometry)
        if value_field is not None:
            attrs = [value_field_index]
        else:
            attrs = []
        attrs.extend(category_field_indexes)
        request.setSubsetOfAttributes(attrs)
        features = source.getFeatures(
            request, QgsProcessingFeatureSource.FlagSkipGeometryValidityChecks)
        total = 50.0 / source.featureCount() if source.featureCount() else 0
        if field_type == 'none':
            values = defaultdict(lambda: 0)
        else:
            values = defaultdict(list)
        for current, feat in enumerate(features):
            if feedback.isCanceled():
                break

            feedback.setProgress(int(current * total))
            attrs = feat.attributes()
            cat = tuple([attrs[c] for c in category_field_indexes])
            if field_type == 'none':
                values[cat] += 1
                continue
            if field_type == 'numeric':
                if attrs[value_field_index] == NULL:
                    continue
                else:
                    value = float(attrs[value_field_index])
            elif field_type == 'string':
                if attrs[value_field_index] == NULL:
                    value = ''
                else:
                    value = str(attrs[value_field_index])
            elif attrs[value_field_index] == NULL:
                value = NULL
            else:
                value = attrs[value_field_index]
            values[cat].append(value)

        (sink, dest_id) = self.parameterAsSink(parameters, self.OUTPUT,
                                               context, fields,
                                               QgsWkbTypes.NoGeometry,
                                               QgsCoordinateReferenceSystem())
        if sink is None:
            raise QgsProcessingException(
                self.invalidSinkError(parameters, self.OUTPUT))

        if field_type == 'none':
            self.saveCounts(values, sink, feedback)
        elif field_type == 'numeric':
            self.calcNumericStats(values, sink, feedback)
        elif field_type == 'datetime':
            self.calcDateTimeStats(values, sink, feedback)
        else:
            self.calcStringStats(values, sink, feedback)

        return {self.OUTPUT: dest_id}
コード例 #56
0
ファイル: core.py プロジェクト: LexMachinaInc/fold_to_ascii
# -*- coding: utf-8 -*-
from collections import defaultdict

from . import mapping


def none_factory():
    return None


default_translate_table = defaultdict(none_factory, mapping.translate_table)


def fold(unicode_string, replacement=u''):
    """Fold unicode_string to ASCII.

Unmapped characters should be replaced with empty string by default, or other
replacement if provided.

All astral plane characters are always removed, even if a replacement is
provided.
    """

    if unicode_string is None:
        return u''

    if not isinstance(unicode_string, str):
        raise TypeError('replace must be a str')

    if not isinstance(replacement, str):
        raise TypeError('replace must be a str')
コード例 #57
0
ファイル: raiden_service.py プロジェクト: valkwarble/raiden
    def __init__(self, chain, default_registry, private_key_bin, transport, discovery, config):
        if not isinstance(private_key_bin, bytes) or len(private_key_bin) != 32:
            raise ValueError('invalid private_key')

        invalid_timeout = (
            config['settle_timeout'] < NETTINGCHANNEL_SETTLE_TIMEOUT_MIN or
            config['settle_timeout'] > NETTINGCHANNEL_SETTLE_TIMEOUT_MAX
        )
        if invalid_timeout:
            raise ValueError('settle_timeout must be in range [{}, {}]'.format(
                NETTINGCHANNEL_SETTLE_TIMEOUT_MIN, NETTINGCHANNEL_SETTLE_TIMEOUT_MAX
            ))

        self.token_to_channelgraph = dict()
        self.tokens_to_connectionmanagers = dict()
        self.manager_to_token = dict()
        self.swapkey_to_tokenswap = dict()
        self.swapkey_to_greenlettask = dict()

        self.identifier_to_statemanagers = defaultdict(list)
        self.identifier_to_results = defaultdict(list)

        # This is a map from a hashlock to a list of channels, the same
        # hashlock can be used in more than one token (for tokenswaps), a
        # channel should be removed from this list only when the lock is
        # released/withdrawn but not when the secret is registered.
        self.token_to_hashlock_to_channels = defaultdict(lambda: defaultdict(list))

        self.chain = chain
        self.default_registry = default_registry
        self.config = config
        self.privkey = private_key_bin
        self.address = privatekey_to_address(private_key_bin)

        endpoint_registration_event = gevent.spawn(
            discovery.register,
            self.address,
            config['external_ip'],
            config['external_port'],
        )
        endpoint_registration_event.link_exception(endpoint_registry_exception_handler)

        self.private_key = PrivateKey(private_key_bin)
        self.pubkey = self.private_key.public_key.format(compressed=False)
        self.protocol = RaidenProtocol(
            transport,
            discovery,
            self,
            config['protocol']['retry_interval'],
            config['protocol']['retries_before_backoff'],
            config['protocol']['nat_keepalive_retries'],
            config['protocol']['nat_keepalive_timeout'],
            config['protocol']['nat_invitation_timeout'],
        )

        # TODO: remove this cyclic dependency
        transport.protocol = self.protocol

        self.message_handler = RaidenMessageHandler(self)
        self.state_machine_event_handler = StateMachineEventHandler(self)
        self.blockchain_events = BlockchainEvents()
        self.greenlet_task_dispatcher = GreenletTasksDispatcher()
        self.on_message = self.message_handler.on_message
        self.alarm = AlarmTask(chain)
        self.shutdown_timeout = config['shutdown_timeout']
        self._block_number = None

        self.transaction_log = StateChangeLog(
            storage_instance=StateChangeLogSQLiteBackend(
                database_path=config['database_path']
            )
        )

        if config['database_path'] != ':memory:':
            self.database_dir = os.path.dirname(config['database_path'])
            self.lock_file = os.path.join(self.database_dir, '.lock')
            self.snapshot_dir = os.path.join(self.database_dir, 'snapshots')
            self.serialization_file = os.path.join(self.snapshot_dir, 'data.pickle')

            if not os.path.exists(self.snapshot_dir):
                os.makedirs(self.snapshot_dir)

            # Prevent concurrent acces to the same db
            self.db_lock = filelock.FileLock(self.lock_file)
        else:
            self.database_dir = None
            self.lock_file = None
            self.snapshot_dir = None
            self.serialization_file = None
            self.db_lock = None

        # If the endpoint registration fails the node will quit, this must
        # finish before starting the protocol
        endpoint_registration_event.join()

        self.start()
コード例 #58
0
def main():
    is_title = False
    last = defaultdict(int)
    while (True):
        con = None
        try:
            con = mdb.connect('10.75.26.127', 'root', '123456', 'firehose')
            con2 = mdb.connect('10.75.26.127', 'root', '123456', 'monitor')
            cur = con.cursor()
            cur2 = con2.cursor()
            values = {}
            values['lines'] = defaultdict(int)
            values['lines']['part num'] = defaultdict(int)
            cur_time = int(time.time())
            cur_time = cur_time - cur_time % 300
            for i in range(7):
                cur.execute(
                    "select `today_sent` from `session` where username=3855001400 and partnum=%d"
                    % i)
                rows = cur.fetchall()
                if len(rows) <= 0:
                    print "can not select from sql"
                row = rows[0]
                acc_value = row[0]
                if (acc_value < last[i]):
                    last[i] = 1
                values['lines']['part num']["part_%d" %
                                            i] = acc_value - last[i]
                values['lines']['total'] += acc_value - last[i]
                last[i] = acc_value
            cur2.execute(
                "insert into `kpis` (`appname`,`timestamp`,`kpi_value`) values('app-firehose', %d, '%s')"
                % (cur_time, json.dumps(values)))
            print json.dumps(values)
            if not is_title:
                title = tree()
                for kpi_name in values:
                    for dim_name in values[kpi_name]:
                        if (dim_name != "total"):
                            title[kpi_name][dim_name] = values[kpi_name][
                                dim_name].keys()
                        else:
                            title[kpi_name]["NONE"] = []
                if len(title) <= 0:
                    continue
                title["data_path"] = 'mysql'
                title_json = json.dumps(title)

                cur2.execute(
                    "select `appname` from `title` where `appname`='%s'" %
                    'app-firehose')
                rows = cur2.fetchall()
                if (len(rows) <= 0):
                    cur2.execute(
                        "insert into `title`(`appname`, `kpi_json`) values('%s', '%s\')"
                        % ('app-firehose', title_json))
                else:
                    cur2.execute(
                        "update `title` set `kpi_json` = '%s' where `appname`='%s'"
                        % (title_json, 'app-firehose'))
            con.commit()
            con2.commit()
            time.sleep(300)
        except mdb.Error, e:
            print "Error %d: %s" % (e.args[0], e.args[1])
        finally:
コード例 #59
0
 def build_graph(self):
     logging.info("building graph")
     self.graph = {
         "nodes": [],
         "links": [],
         "terms": [],
         "people": [],
         "documents": []
     }
     global_clusters_index = {}
     index = 0
     for time in range(self.num_time_slides):
         cluster_weight_given_time = np.zeros(self.num_global_clusters)
         document_count = 0.
         for y in self.time_slides[time]:
             document_count += len(self.document_list_given_time[y])
         document_count /= len(self.time_slides[time])
         for i, cluster in enumerate(self.global_clusters[time]):
             for c in cluster:
                 for w in self.local_clusters[time][c]:
                     cluster_weight_given_time[
                         i] += self.term_freq_given_time[time][w]
         cluster_weight_sum_given_time = sum(cluster_weight_given_time)
         if cluster_weight_sum_given_time == 0:
             cluster_weight_sum_given_time = 1
         for i, cluster in enumerate(self.global_clusters[time]):
             terms = []
             for c in cluster:
                 for w in self.local_clusters[time][c]:
                     terms.append(w)
             if len(terms) == 0:
                 continue
             sorted_terms = sorted(terms,
                                   key=lambda t: self.term_freq[t],
                                   reverse=True)
             sorted_terms_given_time = sorted(
                 terms,
                 key=lambda t: self.term_freq_given_time[time][t],
                 reverse=True)
             self.graph["nodes"].append({
                 "key": [{
                     "term": self.term_list[k],
                     "w": int(self.term_freq_given_time[time][k])
                 } for k in sorted_terms_given_time],
                 "name":
                 self.term_list[sorted_terms_given_time[0]],
                 "pos":
                 time,
                 "w":
                 cluster_weight_given_time[i] /
                 cluster_weight_sum_given_time * (document_count + 1),
                 "n":
                 cluster_weight_given_time[i] /
                 cluster_weight_sum_given_time,
                 "cluster":
                 i
             })
             global_clusters_index[str(time) + "-" + str(i)] = index
             index += 1
     #caculate similarity
     global_clusters_sim_target = defaultdict(dict)
     global_clusters_sim_source = defaultdict(dict)
     for time in range(1, self.num_time_slides):
         for i1, c1 in enumerate(self.global_clusters[time]):
             key1 = str(time) + "-" + str(i1)
             if global_clusters_index.has_key(key1):
                 terms1 = []
                 for c in c1:
                     for w in self.local_clusters[time][c]:
                         terms1.append(w)
                 for i2, c2 in enumerate(self.global_clusters[time - 1]):
                     key2 = str(time - 1) + "-" + str(i2)
                     if global_clusters_index.has_key(key2):
                         terms2 = []
                         for c in c2:
                             for w in self.local_clusters[time][c]:
                                 terms2.append(w)
                         sim = common_word_with_weight(
                             terms1, terms2, self.term_freq)
                         if sim > 0:
                             global_clusters_sim_target[key1][key2] = sim
                             global_clusters_sim_source[key2][key1] = sim
         #for i, c in enumerate(self.global_clusters[time]):
         #    key1 = str(time)+"-"+str(i)
         #    key2 = str(time-1)+"-"+str(i)
         #    if global_clusters_index.has_key(key1) and global_clusters_index.has_key(key2):
         #        global_clusters_sim_target[key1][key2] = 1.
         #        global_clusters_sim_source[key2][key1] = 1.
     for key1 in global_clusters_sim_target:
         if global_clusters_index.has_key(key1):
             m1 = sum(global_clusters_sim_target[key1].values())
             for key2 in global_clusters_sim_target[key1]:
                 if global_clusters_index.has_key(key2):
                     m2 = sum(global_clusters_sim_source[key2].values())
                     self.graph["links"].append({
                         "source":
                         int(global_clusters_index[key2]),
                         "target":
                         int(global_clusters_index[key1]),
                         "w1":
                         global_clusters_sim_target[key1][key2] / float(m1),
                         "w2":
                         global_clusters_sim_target[key1][key2] / float(m2)
                     })
     #term frequence
     sorted_terms = sorted(self.term_list,
                           key=lambda t: self.term_freq[self.term_index[t]],
                           reverse=True)
     for t in sorted_terms:
         term_index = self.term_index[t]
         term_year = defaultdict(list)
         for d in self.reverse_term_dict[term_index]:
             term_year[self.document_list[d].stat[0].value].append(d)
         sorted_term_year = sorted(term_year.items(), key=lambda t: t[0])
         if len(sorted_term_year) == 0:
             continue
         ty = {}
         for i in range(self.start_time + 1, self.end_time):
             ty[i] = 0.0
         for c in term_year:
             ty[c] = len(term_year[c])
         start_point = sorted_term_year[0][0]
         start_time = self.get_time_slide(start_point)
         start_cluster = self.global_cluster_labels[start_time][
             self.local_cluster_labels[start_time][term_index]]
         start_node = global_clusters_index[str(start_time) + "-" +
                                            str(start_cluster)]
         item = {
             "t":
             t,
             "idx":
             int(term_index),
             "freq":
             int(self.term_freq[term_index]),
             "dist": [0 for i in range(self.num_time_slides)],
             "year": [{
                 "y": j,
                 "d": ty[j]
             } for j in ty],
             "cluster": [0 for i in range(self.num_time_slides)],
             "node": [0 for i in range(self.num_time_slides)],
             "doc": [int(d) for d in self.reverse_term_dict[term_index]],
             "first": [{
                 "p": p,
                 "y": self.term_first_given_person[term_index][p]
             } for p in self.term_first_given_person[term_index]],
             "start": {
                 "year": int(start_point),
                 "time": int(start_time),
                 "cluster": int(start_cluster),
                 "node": int(start_node)
             }
         }
         for time in range(self.num_time_slides):
             item["dist"][time] = int(
                 self.term_freq_given_time[time][term_index])
             local_c = self.local_cluster_labels[time][term_index]
             item["cluster"][time] = int(
                 self.global_cluster_labels[time][local_c])
             item["node"][time] = int(
                 global_clusters_index[str(time) + "-" +
                                       str(item["cluster"][time])])
         self.graph["terms"].append(item)
     #people
     for author in self.author_result:
         self.graph["people"].append({
             "id": author.id,
             "name": author.title,
             #"hindex": author.h_index,
             #"pub_count": author.pub_count,
             #"cite": author.citation_no
         })
     #document
     for i, doc in enumerate(self.document_list):
         self.graph["documents"].append({
             "idx": i,
             "id": int(doc.id),
             "title": doc.title,
             "year": int(doc.stat[0].value
                         ),  #"jconf":doc.jconf_name, #"abs":doc.abs,
             #"cite":int(doc.stat[2].value)
         })  #, "authors":doc.author_ids, "topic":doc.topic})
     #time slides
     self.graph["time_slides"] = self.time_slides
     return self.graph
コード例 #60
0
ファイル: correct.py プロジェクト: quheng/HaSearch
def train(features):
    model = collections.defaultdict(lambda: 1)
    for f in features:
        model[f] += 1
    return model