Example #1
1
    def muster_basis_options(self):
        text = ""
        lowername = self.method.lower()
        options = defaultdict(lambda: defaultdict(dict))

        options["BASIS"]["ORBITAL"]["value"] = self.basis

        if self.method in ["ccsd(t)-f12-optri"]:
            if self.basis == "cc-pvdz-f12":
                options["BASIS"]["JKFIT"]["value"] = "aug-cc-pvtz/jkfit"
                options["BASIS"]["JKFITC"]["value"] = self.basis + "/optri"
                options["BASIS"]["MP2FIT"]["value"] = "aug-cc-pvtz/mp2fit"
        elif (
            ("df-" in self.method)
            or ("f12" in self.method)
            or (self.method in ["mp2c", "dft-sapt", "dft-sapt-pbe0acalda"])
        ):
            if self.unaugbasis and self.auxbasis:
                options["BASIS"]["JKFIT"]["value"] = self.auxbasis + "/jkfit"
                options["BASIS"]["JKFITB"]["value"] = self.unaugbasis + "/jkfit"
                options["BASIS"]["MP2FIT"]["value"] = self.auxbasis + "/mp2fit"
                options["BASIS"]["DFLHF"]["value"] = self.auxbasis + "/jkfit"
            else:
                raise ValidationError("""Auxiliary basis not predictable from orbital basis '%s'""" % (self.basis))
        return text, options
Example #2
0
    def default_scheduling_algorithm(self):
        """
        Decide whether we need to schedule our own triggers (if at all) in
        order to progress to the next mode.

        This algorithm has been tested against the following simulators:
            Icarus Verilog
        """
        if not self._terminate and self._writes:

            if self._mode == Scheduler._MODE_NORMAL:
                if not self._readwrite.primed:
                    self._readwrite.prime(self.react)
            elif not self._next_timestep.primed:
                self._next_timestep.prime(self.react)

        elif self._terminate:
            if _debug:
                self.log.debug("Test terminating, scheduling Timer")

            for t in self._trigger2coros:
                t.unprime()

            for t in [self._readwrite, self._readonly, self._next_timestep,
                      self._timer1, self._timer0]:
                if t.primed:
                    t.unprime()

            self._timer1.prime(self.begin_test)
            self._trigger2coros = collections.defaultdict(list)
            self._coro2triggers = collections.defaultdict(list)
            self._terminate = False
            self._mode = Scheduler._MODE_TERM
def getTrainingContextData():
    
    training_data = OrderedDict()
    
    #Initialising the xml parser for the training and test set
    training_root = initializeXMLParser(dir_path+training_file) 
    
    #Grabbing one word type at a time
    for word_type_xml in training_root:
        word_type = word_type_xml.attrib['item']
        training_data[word_type] = defaultdict(lambda: defaultdict(dict))
        
        #Grabbing the instance id and its list of senses
        for word_instance in word_type_xml:
            instance = word_instance.attrib['id']
            senses   = [answer.attrib['senseid'] for answer in word_instance.findall('answer')]
            pre_context  = word_instance.find('context').text.split()
            post_context = word_instance.find('context').find('head').tail.split()
            
            #Pre-processing the pre-context and post context
            #TODO: Check why this is reducing the accuracy of the model by 1%
            pre_context = preProcessContextData(pre_context)
            post_context = preProcessContextData(post_context)
            
            training_data[word_type]['training'][instance] = {"Sense":senses, "Pre-Context":pre_context, "Post-Context":post_context }
        
        #break;#TODO: Remove this breakpoint. Only testing for one word type right now
    return training_data
Example #4
0
  def __init__(self, max_n):
    """
    max_n must be greater than or equal to 2.
    """
    self._max_n = max_n

    # Maps {n: {ngram_prefix: word_counts}}
    #    ngram_prefix is a tuple of words.
    #    word_counts is a Counter of word to count.
    self._ngram_word_counts_map = {}

    # Used to calculate the continuation counts.
    # For each n, maps a word to a set of ngram_prefix that preceed it.
    # Maps {n: {word: set(ngram_prefix)}}
    self._continuations_map = {}

    # Used to normalize continuation counts into a probability.
    # Maps {n: set(ngram)}
    self._ngrams_map = {}

    # Maps {order: discount}
    # TODO(dounanshi): calculate discount http://www.riacs.edu/research/technical_reports/TR_pdf/TR_00.07.pdf
    self._discount_map = {1: .75, 2: .75, 3: .75}

    # Initialize maps.
    for i in range(max_n):
      n = i + 1
      self._ngram_word_counts_map[n] = defaultdict(Counter)
      self._continuations_map[n] = defaultdict(set)
      self._ngrams_map[n] = set()

    # Maps {ngram_prefix: count}
    self._prefix_count_cache = {}
    # Maps {ngram_prefix: (n1, n2, n3)}
    self._nvals_cache = {}
Example #5
0
def APMTracker(replay):
    """
    Builds ``player.aps`` and ``player.apm`` dictionaries where an action is
    any Selection, Hotkey, or Ability event.

    Also provides ``player.avg_apm`` which is defined as the sum of all the
    above actions divided by the number of seconds played by the player (not
    necessarily the whole game) multiplied by 60.
    """
    for player in replay.players:
        player.aps = defaultdict(int)
        player.apm = defaultdict(int)
        player.seconds_played = replay.length.seconds

        for event in player.events:
            if event.name == 'SelectionEvent' or 'AbilityEvent' in event.name or 'ControlGroup' in event.name:
                player.aps[event.second] += 1
                player.apm[int(event.second/60)] += 1

            elif event.name == 'PlayerLeaveEvent':
                player.seconds_played = event.second

        if len(player.apm) > 0:
            player.avg_apm = sum(player.aps.values())/float(player.seconds_played)*60
        else:
            player.avg_apm = 0

    return replay
Example #6
0
 def worker_list(self, include_running=True, **kwargs):
     self.prune()
     workers = [
         dict(
             name=worker.id,
             last_active=worker.last_active,
             started=getattr(worker, 'started', None),
             **worker.info
         ) for worker in self._state.get_active_workers()]
     workers.sort(key=lambda worker: worker['started'], reverse=True)
     if include_running:
         running = collections.defaultdict(dict)
         num_pending = collections.defaultdict(int)
         num_uniques = collections.defaultdict(int)
         for task in self._state.get_pending_tasks():
             if task.status == RUNNING and task.worker_running:
                 running[task.worker_running][task.id] = self._serialize_task(task.id, False)
             elif task.status == PENDING:
                 for worker in task.workers:
                     num_pending[worker] += 1
                 if len(task.workers) == 1:
                     num_uniques[list(task.workers)[0]] += 1
         for worker in workers:
             tasks = running[worker['name']]
             worker['num_running'] = len(tasks)
             worker['num_pending'] = num_pending[worker['name']]
             worker['num_uniques'] = num_uniques[worker['name']]
             worker['running'] = tasks
     return workers
Example #7
0
    def prune_features(self, clser, min_pos_feature_count, min_neg_feature_count, verbose=False):
        if verbose:
            print 'Pruning the features'
            print

        features_counts = defaultdict(int)
        for feat in self.classifiers_features[clser]:
            for f in feat:
                features_counts[f] += 1

        if verbose:
            print "  Number of features: ", len(features_counts)


        features_counts = defaultdict(lambda: [0, 0])
        for feat, output in zip(self.classifiers_features[clser], self.classifiers_outputs[clser]):
            output = 0 if output < 0.5 else 1

            for f in feat:
                features_counts[f][output] += 1

        remove_features = []
        for f in features_counts:
            negative, positive = features_counts[f]

            if positive >= min_pos_feature_count + len(f):
                # keep it
                continue


            if negative >= min_neg_feature_count + len(f):
                # keep it
                continue

            # remove the feature since it does not meet the criteria
            remove_features.append(f)

        if verbose:
            print "  Number of features occurring less then %d positive times and %d negative times: %d" % \
                  (min_pos_feature_count, min_neg_feature_count, len(remove_features))

        remove_features = set(remove_features)
        for feat in self.classifiers_features[clser]:
            feat.prune(remove_features)


        # count the features again and report the result
        features_counts = defaultdict(int)
        for feat in self.classifiers_features[clser]:
            for f in feat:
                features_counts[f] += 1

        self.classifiers_features_list[clser] = features_counts.keys()

        self.classifiers_features_mapping[clser] = {}
        for i, f in enumerate(self.classifiers_features_list[clser]):
            self.classifiers_features_mapping[clser][f] = i

        if verbose:
            print "  Number of features after pruning: ", len(features_counts)
def _Symbolize(input):
  asan_libs = _FindASanLibraries()
  libraries = collections.defaultdict(list)
  asan_lines = []
  for asan_log_line in [a.strip() for a in input]:
    m = _ParseAsanLogLine(asan_log_line)
    if m:
      libraries[m['library']].append(m)
    asan_lines.append({'raw_log': asan_log_line, 'parsed': m})

  all_symbols = collections.defaultdict(dict)
  original_symbols_dir = symbol.SYMBOLS_DIR
  for library, items in libraries.iteritems():
    libname = _TranslateLibPath(library, asan_libs)
    lib_relative_addrs = set([i['rel_address'] for i in items])
    info_dict = symbol.SymbolInformationForSet(libname,
                                               lib_relative_addrs,
                                               True)
    if info_dict:
      all_symbols[library]['symbols'] = info_dict

  for asan_log_line in asan_lines:
    m = asan_log_line['parsed']
    if not m:
      print asan_log_line['raw_log']
      continue
    if (m['library'] in all_symbols and
        m['rel_address'] in all_symbols[m['library']]['symbols']):
      s = all_symbols[m['library']]['symbols'][m['rel_address']][0]
      print s[0], s[1], s[2]
    else:
      print asan_log_line['raw_log']
Example #9
0
 def edit_quantiles(self,q=.01,quantile_range=False,v=False,write=True):
     basic.log('creating edit quantiles %s' % self.lang)
     f_out = basic.create_dir('results/quantiles')
     df = pd.read_csv(self.db_path)
     df = self.drop_dups(df)
     df.page_id = df.page_id.astype(int)
     if self.drop1:
         df = df.loc[(df['len'] > 1)]
     q = np.arange(q,1+q,q)
     results = defaultdict(dict)
     for n in self.namespace:
         results[n] = defaultdict(dict)
         for r in self.revert:
             basic.log('%s %s %s' % (self.lang,n,r))
             if n == 'at':
                 result = df[r].quantile(q=q)
                 mean = df[r].mean()
             else:
                 result = df.loc[(df['namespace'] == self.namespace.index(n)),r].quantile(q=q)
                 #qcut = pd.qcut(df.loc[(df['namespace'] == self.namespace.index(n)),r],q)
                 #print(qcut)
                 mean = df.loc[(df['namespace'] == self.namespace.index(n)),r].mean()
             result = result.to_frame()
             column = '%s_%s_%s' % (self.lang,n,r)
             result.columns = [column]
             results[n][r] = {'quantiles':result,'mean':mean}
             if write:
                 result = result.append(DataFrame({column:result.loc[(result[column] < int(mean+1))].tail(1).index.values},index=['mean_quantile']))
                 result = result.append(DataFrame({column:mean},index=['mean_value']))
                 result.to_csv('%s/%s_%s_%s.csv' % (f_out,self.lang,n,r),encoding='utf-8',index_label='qauntiles')
     return results
Example #10
0
    def to_dict(self, default=None):
        """
        Converts sequence of (Key, Value) pairs to a dictionary.

        >>> type(seq([('a', 1)]).to_dict())
        dict

        >>> seq([('a', 1), ('b', 2)]).to_dict()
        {'a': 1, 'b': 2}

        :param default: Can be a callable zero argument function. When not None, the returned
            dictionary is a collections.defaultdict with default as value for missing keys. If the
            value is not callable, then a zero argument lambda function is created returning the
            value and used for collections.defaultdict
        :return: dictionary from sequence of (Key, Value) elements
        """
        dictionary = {}
        for e in self.sequence:
            dictionary[e[0]] = e[1]
        if default is None:
            return dictionary
        else:
            if hasattr(default, '__call__'):
                return collections.defaultdict(default, dictionary)
            else:
                return collections.defaultdict(lambda: default, dictionary)
Example #11
0
    def __init__(self, ldg=None):
        DependencyGraph.__init__(self)
        self.nodes = defaultdict(lambda: {'address': None,
                                          'ldg': 0,
                                          'gid': 1, #has the same value of the gid of nodes in ldg.
                                          'lemma': None,
                                          'head': None,
                                          'deps': defaultdict(int),
                                          'remaining_ops': defaultdict(list), #list(LgGraph.operator_dic.keys()),
                                          'ctag': None,
                                          'tag': None,
                                          'feats': None,
                                          })
        self.git_list = [1]
        self.nodes[0].update(
                        {'address': 0,
                         'head': -1,
                         'ldg': 'TOP',
                         'gid': 1, #has the same value of the gid of nodes in ldg.
                         'remaining_ops': defaultdict(list),
                         }
                    )
        if isinstance(ldg, LgGraph):
            self.nodes[0]['ldg'] = ldg

        if isinstance(ldg, GraphNet):
            self.nodes = ldg
            self.git_list = ldg.get_git_list()
Example #12
0
  def __init__(self, k, messages):
    self.k = k
    # user_id -> (phrase -> [next words])
    self.m = defaultdict(lambda : defaultdict(list))

    for message in messages:
      self.read_message(message)
Example #13
0
  def __init__(self, names, messages):
    self.names = names
    self.messages = messages
    # do some preanalysis

    # MBU: user_id -> [message]
    self.messages_by_user = defaultdict(list)

    # who has liked {{user}}'s messages?
    # user_id -> (liker -> count)
    self.likes_per_user = defaultdict(lambda : defaultdict(int))

    # who has {{user}} liked?
    # user_id -> (liked -> count)
    self.user_likes = defaultdict(lambda : defaultdict(int))

    # which words are used most often?
    # word -> (user_id -> count)
    self.most_common_words = defaultdict(lambda : defaultdict(int))

    # per user, which words are used most often?
    # user_id -> (word -> count)
    self.mcw_per_user = defaultdict(lambda : defaultdict(int))

    # which users have liked their own posts?
    # user_id -> count
    self.self_likes = defaultdict(int)

    for message in messages:
      self.read_message(message)
Example #14
0
def show_connections():
	user=User.query.filter_by(id=session['user_id']).first()
	if 'linkedin_token' in session:
		conns = linkedin.get('people/~/connections:(headline,id,first-name,last-name,location,industry,picture-url)')

	f = open('data.json', 'w')
	f.write(json.dumps(conns.data, indent=1))
	f.close()
	connections = json.loads(json.dumps(conns.data, indent=1))
    # Get an id for a connection. We'll just pick the first one.
	print len(connections['values'])
	index = 0
	all = list()
	categorized = defaultdict(list)
	countdata = defaultdict()
    
	for conn in connections['values']:
		try:
            #all.append()
			name = conn['firstName'].encode("utf-8")+' '+conn['lastName'].encode("utf-8")
			industry = conn['industry'].encode("utf-8")
			headline = conn['headline'].encode("utf-8")
			contact = (name, industry, headline)
            #conn['firstName'].encode("utf-8"), conn['lastName'].encode("utf-8"), conn['id'].encode("utf-8"), , conn['picture-url'].encode("utf-8"), conn['location'].encode("utf-8")
			all.append(contact)
			categorized[industry].append(contact)
		except KeyError: pass
		index = index+1

	for key in categorized:
		if len(categorized[key])*1000/index > 10 :
			countdata[key] = len(categorized[key])
			print key,countdata[key]
	return render_template("connections.html", title = 'Connections', all_conn=all, cat_conn=categorized, cat_count = countdata, user=user)
Example #15
0
  def __init__(self, analysis_files):
    # The analysis files we gather information from.
    self.analysis_files = analysis_files

    # Map from scala source files to the class files generated from that source
    self.products = defaultdict(set)

    # Map from scala sources to jar files they depend on. (And, rarely, class files.)
    self.binary_deps = defaultdict(set)

    # Map from scala sources to the source files providing the classes that they depend on
    # The set of source files here does *not* appear to include inheritance!
    # eg, in src/jvm/com/foursquare/api/util/BUILD:util,
    # in the source file ClientMetrics, class ClientView extends PrettyEnumeration, but
    # the file declaring PrettyEnumeration is *not* in the source deps.
    # But PrettyEnumeration *is* included in the list of classes in external_deps.
    self.source_deps = defaultdict(set)

    # Map from scala sources to the classes that they depend on. (Not class files, source files, but just classes.
    self.external_deps = defaultdict(set)

    # Map from scala sources to the classes that they provide. (Again, not class files, fully-qualified class names.)
    self.class_names = defaultdict(set)

    for c in self.analysis_files:
      self.parse(c)
Example #16
0
    def search_all(self, text):
        candidates = defaultdict(float)

        for ngram in text_to_ngrams(text, self.size):
            matches = self.ngrams.get(ngram, None)
            if not matches:
                continue
            total = matches["total"]
            for name, value in list(matches["name"].items()):
                candidates[name] += float(value) / total

        def score():
            return {
                "value": 0.0,
                }

        high_scores = defaultdict(score)

        for name, value in list(candidates.items()):
            row = self.by_name.get(name, None)
            key = row["iso2"]
            if row["sub"]:
                key += ">"
            if value > high_scores[key]["value"]:
                high_scores[key] = {
                    "iso2": row["iso2"],
                    "value": value,
                    "name": name,
                    "sub": row["sub"],
                }

        high_scores = sorted(high_scores.values(),
                             key=lambda x: x["value"], reverse=True)

        return high_scores
Example #17
0
    def invoke(self, dirname, filenames=set(), linter_configs=set()):
        """
        Main entrypoint for all plugins.

        Returns results in the format of:

        {'filename': {
          'line_number': [
            'error1',
            'error2'
            ]
          }
        }

        """
        retval = defaultdict(lambda: defaultdict(list))
        extensions = ' -o '.join(['-name "*%s"' % ext for ext in
                                  self.get_file_extensions()])

        cmd = 'find %s %s | xargs %s' % (
            dirname, extensions, self.get_command(
                dirname,
                linter_configs=linter_configs))
        result = self.executor(cmd)
        for line in result.split('\n'):
            output = self.process_line(dirname, line)
            if output is not None:
                filename, lineno, messages = output
                if filename.startswith(dirname):
                    filename = filename[len(dirname) + 1:]
                retval[filename][lineno].append(messages)
        return retval
    def generate_te_doping(self, d):
        types = ['p', 'n']
        target = 'seebeck_doping'  # root key for getting all temps, etc

        pf_dict = defaultdict(lambda: defaultdict(int))
        zt_dict = defaultdict(lambda: defaultdict(int))

        for type in types:
            for t in d[target][type]:  # temperatures
                outside_pf_array = []
                outside_zt_array = []
                for didx, tensor in enumerate(d[target][type][t]):  # doping idx
                    inside_pf_array = []
                    inside_zt_array = []
                    for tidx, val in enumerate(tensor):
                            seebeck = d['seebeck_doping'][type][t][didx][tidx]
                            cond = d['cond_doping'][type][t][didx][tidx]
                            kappa = d['kappa_doping'][type][t][didx][tidx]
                            inside_pf_array.append(seebeck*seebeck*cond)
                            inside_zt_array.append(seebeck*seebeck*cond*t/kappa)
                    outside_pf_array.append(inside_pf_array)
                    outside_zt_array.append(inside_zt_array)

                pf_dict[type][t] = outside_pf_array
                zt_dict[type][t] = outside_zt_array

        return pf_dict, zt_dict
Example #19
0
    def extract_classifiers(self, das, utterances, verbose=False):
        # process the training data
        self.utterances = utterances
        self.das = das

        self.utterances_list = self.utterances.keys()

        self.utterance_fvc = {}
        self.das_abstracted = {}
        self.das_category_labels = {}
        for utt_idx in self.utterances_list:
            self.utterances[utt_idx] = self.preprocessing.normalise(self.utterances[utt_idx])
            self.utterance_fvc[utt_idx] = self.get_fvc(self.utterances[utt_idx])
            self.das_abstracted[utt_idx], self.das_category_labels[utt_idx] = \
                self.get_abstract_da(self.das[utt_idx],self.utterance_fvc[utt_idx])

        # get the classifiers
        self.classifiers = defaultdict(int)
        self.classifiers = defaultdict(int)

        for k in self.utterances_list:
            for dai in self.das_abstracted[k].dais:
                self.classifiers[unicode(dai)] += 1

                if verbose:
                    if dai.value and 'CL_' not in dai.value:
                        print '=' * 120
                        print 'Un-abstracted category label value'
                        print '-' * 120
                        print unicode(self.utterances[k])
                        print unicode(self.utterance_fvc[k])
                        print unicode(self.das[k])
                        print unicode(self.das_abstracted[k])
Example #20
0
def metadata_catalog(fits_filenames):
    "Histogram the metadata values in list of fits files."
    
    common, optional = metadata_field_use(fits_filenames)
    allfields = optional.union(common)
    histo = collections.defaultdict(int)
    values = collections.defaultdict(set)
    for fname in fits_filenames:
        hdulist = pyfits.open(fname)
        hdr = hdulist[0].header
        for field in allfields:
            if field in hdr:
                histo[field] += 1
                values[field].add(str(hdr[field]))
        hdulist.close()

    print('\n', '~'*78)
    print('Histogram of field use:')
    pprint(histo)

    print('\n', '~'*78)
    
    max_unique = 0.80
    print('Values used (max %s unique values):'%(max_unique))
    #! pprint(values)
    for k,v in values.items():
        if float(len(v))/len(fits_filenames) > max_unique: continue
        print('%8s: %s'%(k,', '.join(v)))
Example #21
0
    def _getavailablepackfiles(self):
        """For each pack file (a index/data file combo), yields:
          (full path without extension, mtime, size)

        mtime will be the mtime of the index/data file (whichever is newer)
        size is the combined size of index/data file
        """
        indexsuffixlen = len(self.INDEXSUFFIX)
        packsuffixlen = len(self.PACKSUFFIX)

        ids = set()
        sizes = defaultdict(lambda: 0)
        mtimes = defaultdict(lambda: [])
        try:
            for filename, type, stat in osutil.listdir(self.path, stat=True):
                id = None
                if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
                    id = filename[:-indexsuffixlen]
                elif filename[-packsuffixlen:] == self.PACKSUFFIX:
                    id = filename[:-packsuffixlen]

                # Since we expect to have two files corresponding to each ID
                # (the index file and the pack file), we can yield once we see
                # it twice.
                if id:
                    sizes[id] += stat.st_size # Sum both files' sizes together
                    mtimes[id].append(stat.st_mtime)
                    if id in ids:
                        yield (os.path.join(self.path, id), max(mtimes[id]),
                            sizes[id])
                    else:
                        ids.add(id)
        except OSError as ex:
            if ex.errno != errno.ENOENT:
                raise
Example #22
0
def add2Tree(tree,id,name,floor,type):
    type = 'Type-'+type
    for i, btype in enumerate(tree['item']):
        if btype['id'] == type:
            ntype = btype
            break
    else:
        ntype = defaultdict()
        ntype['text'] = type
        ntype['id'] = type
        ntype['item'] = []
        tree['item'].append(ntype)
    floor = 'Floor-' + floor
    floorid = type + floor
    for i, bfloor in enumerate(ntype['item']):
        if bfloor['id'] == floorid:
            nfloor = bfloor
            break
    else:
        nfloor = defaultdict()
        nfloor['text'] = floor
        nfloor['id'] = floorid
        nfloor['item'] = []
        ntype['item'].append(nfloor)

    leaf = defaultdict()
    leaf['id']= str(id) + '-' + name
    leaf['text']= '.'.join(name.split('.')[3:])
    nfloor['item'].append(leaf)
Example #23
0
def run_merge(filenames):
    """Merges all Skype databases to a new database."""
    dbs = [skypedata.SkypeDatabase(f) for f in filenames]
    db_base = dbs.pop()
    counts = collections.defaultdict(lambda: collections.defaultdict(int))
    postbacks = Queue.Queue()
    postfunc = lambda r: postbacks.put(r)
    worker = workers.MergeThread(postfunc)

    name, ext = os.path.splitext(os.path.split(db_base.filename)[-1])
    now = datetime.datetime.now().strftime("%Y%m%d")
    filename_final = util.unique_path("%s.merged.%s%s" %  (name, now, ext))
    print("Creating %s, using %s as base." % (filename_final, db_base))
    shutil.copyfile(db_base.filename, filename_final)
    db2 = skypedata.SkypeDatabase(filename_final)
    chats2 = db2.get_conversations()
    db2.get_conversations_stats(chats2)

    for db1 in dbs:
        chats = db1.get_conversations()
        db1.get_conversations_stats(chats)
        bar_total = sum(c["message_count"] for c in chats)
        bar_text = " Processing %.*s.." % (30, db1)
        bar = ProgressBar(max=bar_total, afterword=bar_text)
        bar.start()
        args = {"db1": db1, "db2": db2, "chats": chats,
                "type": "diff_merge_left"}
        worker.work(args)
        while True:
            result = postbacks.get()
            if "error" in result:
                print("Error merging %s:\n\n%s" % (db1, result["error"]))
                worker = None # Signal for global break
                break # break while True
            if "done" in result:
                break # break while True
            if "diff" in result:
                counts[db1]["chats"] += 1
                counts[db1]["msgs"] += len(result["diff"]["messages"])
                msgcounts = sum(c["message_count"] for c in result["chats"])
                bar.update(bar.value + msgcounts)
            if result["output"]:
                log(result["output"])
        if not worker:
            break # break for db1 in dbs
        bar.stop()
        bar.afterword = " Processed %s." % db1
        bar.update(bar_total)
        print

    if not counts:
        print("Nothing new to merge.")
        db2.close()
        os.unlink(filename_final)
    else:
        for db1 in dbs:
            print("Merged %s in %s from %s." %
                  (util.plural("message", counts[db1]["msgs"]),
                   util.plural("chat", counts[db1]["chats"]), db1))
        print("Merge into %s complete." % db2)
Example #24
0
def hierarchical(keys):
    """
    Iterates over dimension values in keys, taking two sets
    of dimension values at a time to determine whether two
    consecutive dimensions have a one-to-many relationship.
    If they do a mapping between the first and second dimension
    values is returned. Returns a list of n-1 mappings, between
    consecutive dimensions.
    """
    ndims = len(keys[0])
    if ndims <= 1:
        return True
    dim_vals = list(zip(*keys))
    combinations = (zip(*dim_vals[i : i + 2]) for i in range(ndims - 1))
    hierarchies = []
    for combination in combinations:
        hierarchy = True
        store1 = defaultdict(list)
        store2 = defaultdict(list)
        for v1, v2 in combination:
            if v2 not in store2[v1]:
                store2[v1].append(v2)
            previous = store1[v2]
            if previous and previous[0] != v1:
                hierarchy = False
                break
            if v1 not in store1[v2]:
                store1[v2].append(v1)
        hierarchies.append(store2 if hierarchy else {})
    return hierarchies
Example #25
0
File: add.py Project: ENuge/sympy
    def as_coefficients_dict(a):
        """Return a dictionary mapping terms to their Rational coefficient.
        Since the dictionary is a defaultdict, inquiries about terms which
        were not present will return a coefficient of 0. If an expression is
        not an Add it is considered to have a single term.

        Examples
        ========

        >>> from sympy.abc import a, x
        >>> (3*x + a*x + 4).as_coefficients_dict()
        {1: 4, x: 3, a*x: 1}
        >>> _[a]
        0
        >>> (3*a*x).as_coefficients_dict()
        {a*x: 3}
        """

        d = defaultdict(list)
        for ai in a.args:
            c, m = ai.as_coeff_Mul()
            d[m].append(c)
        for k, v in d.iteritems():
            if len(v) == 1:
                d[k] = v[0]
            else:
                d[k] = Add(*v)
        di = defaultdict(int)
        di.update(d)
        return di
    def findSubstring(self, s, words):
        """
        :type s: str
        :type words: List[str]
        :rtype: List[int]
        """
        result, m, n, k = [], len(s), len(words), len(words[0])
        if m < n*k:
            return result

        lookup = collections.defaultdict(int)
        for i in words:
            lookup[i] += 1                            # Space: O(n * k)

        for i in xrange(m+1-k*n):                     # Time: O(m)
            cur, j = collections.defaultdict(int), 0
            while j < n:                              # Time: O(n)
                word = s[i+j*k:i+j*k+k]               # Time: O(k)
                if word not in lookup:
                    break
                cur[word] += 1
                if cur[word] > lookup[word]:
                    break
                j += 1
            if j == n:
                result.append(i)

        return result
def bench_b(power_list):

    n_samples, n_features = 1000, 10000
    data_params = {'n_samples': n_samples, 'n_features': n_features,
                   'tail_strength': .7, 'random_state': random_state}
    dataset_name = "low rank matrix %d x %d" % (n_samples, n_features)
    ranks = [10, 50, 100]

    if enable_spectral_norm:
        all_spectral = defaultdict(list)
    all_frobenius = defaultdict(list)
    for rank in ranks:
        X = make_low_rank_matrix(effective_rank=rank, **data_params)
        if enable_spectral_norm:
            X_spectral_norm = norm_diff(X, norm=2, msg=False)
        X_fro_norm = norm_diff(X, norm='fro', msg=False)

        for n_comp in [np.int(rank/2), rank, rank*2]:
            label = "rank=%d, n_comp=%d" % (rank, n_comp)
            print(label)
            for pi in power_list:
                U, s, V, _ = svd_timing(X, n_comp, n_iter=pi, n_oversamples=2,
                                        power_iteration_normalizer='LU')
                if enable_spectral_norm:
                    A = U.dot(np.diag(s).dot(V))
                    all_spectral[label].append(norm_diff(X - A, norm=2) /
                                               X_spectral_norm)
                f = scalable_frobenius_norm_discrepancy(X, U, s, V)
                all_frobenius[label].append(f / X_fro_norm)

    if enable_spectral_norm:
        title = "%s: spectral norm diff vs n power iteration" % (dataset_name)
        plot_power_iter_vs_s(power_iter, all_spectral, title)
    title = "%s: frobenius norm diff vs n power iteration" % (dataset_name)
    plot_power_iter_vs_s(power_iter, all_frobenius, title)
Example #28
0
    def reload(self, data):
        self.data = data
        self.data = [row for row in self.data if row["lang"] == self.lang]

        self.by_name = {}
        for row in self.data:
            self.by_name[row["name"]] = {
                "iso2":row["iso2"],
                "sub":row["sub"],
                }

        def ngram_dict():
            return {
                "name": defaultdict(float),
                "total": 0.0
                }

        names = defaultdict(list)
        for row in self.data:
            iso2 = row["iso2"]
            name = row["name"]
            names[iso2].append(name)

        self.ngrams = defaultdict(ngram_dict)
        for iso2, name_list in list(names.items()):
            weight = 1.0 / len(name_list)
            for name in name_list:
                for ngram in text_to_ngrams(name, self.size):
                    self.ngrams[ngram]["name"][name] += weight
                    self.ngrams[ngram]["total"] += weight
Example #29
0
 def get_context_data(self, **kwargs):
     context = super(BugmailStatsView, self).get_context_data(**kwargs)
     json_stats = cache.get(self.cache_key)
     if not json_stats:
         wks_ago = (now() - timedelta(days=14)).date()
         stats = BugmailStat.objects.stats_for_range(wks_ago)
         stats_dict = {
             BugmailStat.TOTAL: defaultdict(int),
             BugmailStat.USED: defaultdict(int),
         }
         for s in stats:
             stats_dict[s.stat_type][date_to_js(s.date)] += s.count
         all_stats = {
             'total': [],
             'used': [],
             'x_axis': [],
         }
         stats_total = stats_dict[BugmailStat.TOTAL]
         stats_used = stats_dict[BugmailStat.USED]
         for d in date_range(wks_ago):
             d = date_to_js(d)
             all_stats['x_axis'].append(d)
             all_stats['total'].append([d, stats_total[d]])
             all_stats['used'].append([d, stats_used[d]])
         json_stats = json.dumps(all_stats)
         cache.set(self.cache_key, json_stats, 1800)  # 30 minutes
     context['stats'] = json_stats
     return context
Example #30
0
    def __init__(self):

        self.log = SimLog("cocotb.scheduler")
        if _debug:
            self.log.setLevel(logging.DEBUG)

        # A dictionary of pending coroutines for each trigger,
        # indexed by trigger
        self._trigger2coros = collections.defaultdict(list)

        # A dictionary of pending triggers for each coroutine, indexed by coro
        self._coro2triggers = collections.defaultdict(list)

        # Our main state
        self._mode = Scheduler._MODE_NORMAL

        # A dictionary of pending writes
        self._writes = {}

        self._pending_coros = []
        self._pending_callbacks = []
        self._pending_triggers = []
        self._pending_threads = []
        self._pending_events = []   # Events we need to call set on once we've unwound

        self._terminate = False
        self._test_result = None
        self._entrypoint = None
        self._main_thread = threading.current_thread()

        # Select the appropriate scheduling algorithm for this simulator
        self.advance = self.default_scheduling_algorithm
        self._is_reacting = False