Beispiel #1
0
def gen_cls_sk(sk_dir, cls):
  mtds = collect_decls(cls, "mtds")
  flds = collect_decls(cls, "flds")
  s_flds = filter(op.attrgetter("is_static"), flds)
  if cls.is_class:
    if not mtds and not s_flds: return None
  else: # cls.is_itf or cls.is_enum
    if not s_flds: return None

  cname = util.sanitize_ty(cls.name)

  buf = cStringIO.StringIO()
  buf.write("package {};\n".format(cname))
  buf.write(_const)

  # static fields
  buf.write('\n'.join(map(trans_fld, s_flds)))
  if len(s_flds) > 0: buf.write('\n')

  # migrating static fields' initialization to <clinit>
  for fld in ifilter(op.attrgetter("init"), s_flds):
    if not fld.init.has_call and not fld.init.has_str and not fld.is_aliasing: continue
    # retrieve (or declare) <clinit>
    clinit = fld.clazz.get_or_declare_clinit()
    if clinit not in mtds: mtds.append(clinit)
    # add assignment
    assign = st.gen_S_assign(exp.gen_E_id(fld.name), fld.init)
    clinit.body.append(assign)

  # accessors for static fields
  for fld in ifilterfalse(op.attrgetter("is_private"), s_flds):
    fname = fld.name
    accessor = trans_fname(fld.clazz.name, fname, True)
    buf.write("""
      {0} {1}() {{ return {2}; }}
    """.format(trans_ty(fld.typ), accessor, fname))

  # methods
  clinits, mtds = util.partition(lambda m: m.is_clinit, mtds)
  inits, mtds = util.partition(lambda m: m.is_init, mtds)
  # <init>/<clinit> should be dumped out in any case
  buf.write('\n'.join(map(to_func, clinits)))
  buf.write('\n'.join(map(to_func, inits)))
  for mtd in mtds:
    # interface won't have method bodies
    if mtd.clazz.is_itf: continue
    buf.write(to_func(mtd) + os.linesep)

  cls_sk = cname + ".sk"
  with open(os.path.join(sk_dir, cls_sk), 'w') as f:
    f.write(buf.getvalue())
    logging.info("encoding " + f.name)
    return cls_sk
Beispiel #2
0
def gen_cls_sk(sk_dir, smpls, cls):
  mtds = collect_decls(cls, "mtds")
  flds = collect_decls(cls, "flds")
  s_flds = filter(op.attrgetter("is_static"), flds)
  if cls.is_class:
    if not mtds and not s_flds: return None
  else: # cls.is_itf or cls.is_enum
    if not s_flds: return None

  cname = util.sanitize_ty(cls.name)

  buf = cStringIO.StringIO()
  buf.write("package {};\n".format(cname))
  buf.write(_const)

  # static fields
  buf.write('\n'.join(map(trans_fld, s_flds)))
  if len(s_flds) > 0: buf.write('\n')

  # migrating static fields' initialization to <clinit>
  for fld in ifilter(op.attrgetter("init"), s_flds):
    if not fld.init.has_call and not fld.init.has_str and not fld.is_aliasing: continue
    # retrieve (or declare) <clinit>
    clinit = fld.clazz.get_or_declare_clinit()
    if clinit not in mtds: mtds.append(clinit)
    # add assignment
    assign = st.gen_S_assign(exp.gen_E_id(fld.name), fld.init)
    clinit.body.append(assign)

  # accessors for static fields
  for fld in ifilterfalse(op.attrgetter("is_private"), s_flds):
    fname = fld.name
    accessor = trans_fname(fld.clazz.name, fname, True)
    buf.write("""
      {0} {1}() {{ return {2}; }}
    """.format(trans_ty(fld.typ), accessor, fname))

  # methods
  clinits, mtds = util.partition(lambda m: m.is_clinit, mtds)
  inits, mtds = util.partition(lambda m: m.is_init, mtds)
  # <init>/<clinit> should be dumped out in any case
  buf.write('\n'.join(map(partial(to_func, smpls), clinits)))
  buf.write('\n'.join(map(partial(to_func, smpls), inits)))
  for mtd in mtds:
    # interface won't have method bodies
    if mtd.clazz.is_itf: continue
    buf.write(to_func(smpls, mtd) + os.linesep)

  cls_sk = cname + ".sk"
  with open(os.path.join(sk_dir, cls_sk), 'w') as f:
    f.write(buf.getvalue())
    logging.info("encoding " + f.name)
    return cls_sk
    def _precompute_counts(self):
        """
            Precompute mappings from (L/M) to (L/M-good configs)
             for discrete L and M.

            Returns:
             A table TBL such that TBL[L][M] returns the list of configurations
              that are L-M-(M-1) acceptable.
             i.e. you get the _names_ of all configurations worse than (M-1)
              but still within M times untyped.

            Running time:
            - L passes over all configurations, so L*(2**N) where N = num modules
            Space:
            - stores at most all configurations in each row of the table,
              L * (2**N)
        """
        LM_table = []
        for L in self.Lvals:
            row = [[]] # Skip the 0 step
            unsorted_configs = self.all_configurations()
            for M in range(1, self.Mmax+1):
                good_iter, rest_iter = util.partition(unsorted_configs, self._curryLM_acceptable(L, M))
                row.append(list(good_iter))
                unsorted_configs = rest_iter
            LM_table.append(row)
        return LM_table
Beispiel #4
0
 def handle_aop(self, bot, event, arg):
     r"""op all unopped member in the channel.
     Usage: /msg uniko \aop channel
            channel -- channel name (as seen from the user)
     """
     arg = irclib.irc_lower(arg.strip())
     if not self.check_channel(bot, arg):
         return False
     network = bot.network
     nickname = irclib.nm_to_n(event.source() or '')
     # TODO: asynchronous?
     for t_network in self.networks:
         if t_network == network:
             continue
         t_channel = t_network.encode(self.channels[t_network])[0]
         t_channel_obj = t_network.get_channel(t_channel)
         t_bot = t_network.get_oper(t_channel)
         if not t_bot:
             continue
         members = set(t_channel_obj.users())
         members = members.difference(t_channel_obj.opers())
         for _ in util.partition(members.__iter__(), 4): # XXX
             mode_string = b'+' + b'o' * len(_) + b' ' + b' '.join(_)
             mode_string = t_network.decode(mode_string)[0]
             t_bot.push_message(Message(
                 command='mode',
                 arguments=(self.channels[t_network], mode_string)))
         message = t_network.decode(b' '.join(members)[0])
         bot.push_message(Message(
             command='privmsg',
             arguments=(network.decode(nickname)[0], message)))
     return True
Beispiel #5
0
                def iterate(cself, svm, classes):
                    cself.mention('Training SVM...')
                    D = spdiag(classes)
                    qp.update_H(D * K * D)
                    qp.update_Aeq(classes.T)
                    alphas, obj = qp.solve(cself.verbose)

                    # Construct SVM from solution
                    svm = SVM(kernel=self.kernel, gamma=self.gamma, p=self.p,
                              verbose=self.verbose, sv_cutoff=self.sv_cutoff)
                    svm._X = bs.instances
                    svm._y = classes
                    svm._alphas = alphas
                    svm._objective = obj
                    svm._compute_separator(K)
                    svm._K = K

                    cself.mention('Recomputing classes...')
                    p_conf = svm._predictions[-bs.L_p:]
                    pos_classes = np.vstack([_update_classes(part)
                                             for part in
                                             partition(p_conf, bs.pos_groups)])
                    new_classes = np.vstack([-np.ones((bs.L_n, 1)), pos_classes])

                    class_changes = round(np.sum(np.abs(classes - new_classes) / 2))
                    cself.mention('Class Changes: %d' % class_changes)
                    if class_changes == 0:
                        return None, svm

                    return {'svm': svm, 'classes': new_classes}, None
    def seek_split_rule(self, criterion='gini'):
        if criterion == 'gini':
            metric = gini
            eval_gain = purity_gain
        elif criterion == 'entropy':
            metric = entropy
            eval_gain = info_gain
        else:
            raise ValueError('%s is not a valid partition criterion' %
                             criterion)

        best_gain = 0
        current_metric_val = metric(self.X)

        for i in self.feat_indices:
            # Extract unique values from dataset in a given feature/column.
            values = set([x[i] for x in self.X])

            for val in values:
                rule = SplitRule(self.column_names[i], i, val)

                # Partition the current dataset and check if everything landed on one side. If so,
                # this is a bad partition, don't consider it.
                true_set, false_set = partition(self.X, rule)
                if len(true_set) == 0 or len(false_set) == 0:
                    continue

                gain = eval_gain([true_set, false_set], current_metric_val,
                                 len(self.X))
                if gain >= best_gain:
                    best_gain, self.rule = gain, rule
Beispiel #7
0
                def iterate(cself, svm, classes):
                    cself.mention('Training SVM...')
                    D = spdiag(classes)
                    qp.update_H(D * K * D)
                    qp.update_Aeq(classes.T)
                    alphas, obj = qp.solve(cself.verbose)

                    # Construct SVM from solution
                    svm = SVM(kernel=self.kernel, gamma=self.gamma, p=self.p,
                              verbose=self.verbose, sv_cutoff=self.sv_cutoff)
                    svm._X = bs.instances
                    svm._y = classes
                    svm._alphas = alphas
                    svm._objective = obj
                    svm._compute_separator(K)
                    svm._K = K

                    cself.mention('Recomputing classes...')
                    p_conf = svm._predictions[-bs.L_p:]
                    pos_classes = np.vstack([_update_classes(part)
                                             for part in
                                             partition(p_conf, bs.pos_groups)])
                    new_classes = np.vstack([-np.ones((bs.L_n, 1)), pos_classes])

                    class_changes = round(np.sum(np.abs(classes - new_classes) / 2))
                    cself.mention('Class Changes: %d' % class_changes)
                    if class_changes == 0:
                        return None, svm

                    return {'svm': svm, 'classes': new_classes}, None
def save_all(users, binsize=1000):
    first, users = peek(iter(users))
    db = first.db
    for chunk in partition(users, binsize):
        batch = leveldb.WriteBatch()
        for user in takewhile(operator.truth, chunk):
            user.save()
        db.Write(batch, sync=True)
Beispiel #9
0
def selectKth(ar,k,left,right):
    idx = selectPivotIndex(ar,left,right)
    pivoIndex = partition(ar, left, right, idx)
    if (left + k - 1) == pivoIndex:
        return pivoIndex

    if left + k - 1 < pivoIndex:
        return selectKth(ar, k, left, pivoIndex-1)
    else:
        return selectKth(ar, k - (pivoIndex - left+1), pivoIndex + 1, right)
    def split(self):
        self.seek_split_rule()

        if self.rule is None:
            return None

        true_set, false_set = partition(self.X, self.rule)
        self.true_branch = TreeNode(self.column_names, X=true_set)
        self.false_branch = TreeNode(self.column_names, X=false_set)

        return self.true_branch, self.false_branch
Beispiel #11
0
def english_probability(text):
    """
    Returns a float representing the likelihood that the given text is a
    plaintext written in English. Range: (0.0 - 1.0), higher is better.
    """
    # Ignore whitespace (revisit this later).
    text = text.upper()
    letters, other = partition(lambda c: c in ENGLISH_FREQUENCIES, text)
    if not letters: return 0.0
    # Expect roughly 15% of text to be spaces.
    spaces, other = partition(lambda c: c.isspace(), other)
    space_error = abs(float(len(spaces))/len(text) - 0.15)
    # As a rough approximation, expect 2% of characters to be punctuation.
    punc_error = abs(float(len(other))/len(text) - 0.02)
    counts = Counter(text)
    letter_error = 0.0
    for c, target_freq in ENGLISH_FREQUENCIES.items():
        letter_error += (target_freq *
                        abs(float(counts.get(c, 0))/len(letters) - target_freq))
    return max(1.0 - (punc_error + letter_error + space_error), 0.0)
Beispiel #12
0
def english_probability(text):
    """
    Returns a float representing the likelihood that the given text is a
    plaintext written in English. Range: (0.0 - 1.0), higher is better.
    """
    # Ignore whitespace (revisit this later).
    text = text.upper()
    letters, other = partition(lambda c: c in ENGLISH_FREQUENCIES, text)
    if not letters: return 0.0
    # Expect roughly 15% of text to be spaces.
    spaces, other = partition(lambda c: c.isspace(), other)
    space_error = abs(float(len(spaces)) / len(text) - 0.15)
    # As a rough approximation, expect 2% of characters to be punctuation.
    punc_error = abs(float(len(other)) / len(text) - 0.02)
    counts = Counter(text)
    letter_error = 0.0
    for c, target_freq in ENGLISH_FREQUENCIES.items():
        letter_error += (
            target_freq *
            abs(float(counts.get(c, 0)) / len(letters) - target_freq))
    return max(1.0 - (punc_error + letter_error + space_error), 0.0)
Beispiel #13
0
def qSort(A, left, right):
    minSize = 3
    if left < right:
        pivotIndex = selectPivotIndex(A, left, right)
        pivotIndex = partition(A, left, right, pivotIndex)
        if pivotIndex - 1 - left <= minSize:
            insertion(A, left, pivotIndex - 1)
        else:
            qSort(A, left, pivotIndex - 1)
        if right - pivotIndex - 1 <= minSize:
            insertion(A, pivotIndex + 1, right)
        else:
            qSort(A, pivotIndex + 1, right)
Beispiel #14
0
def selectMedian(ar, left, right):
    k = (right - left + 1) // 2
    while k > 0:
        idx = medianOfMedias(ar, left, right, 1)
        pivotIndex = partition(ar, left, right, idx)
        p = left + k
        if p == pivotIndex:
            return pivotIndex
        elif p < pivotIndex:
            right = pivotIndex - 1
        else:
            k = k - (pivotIndex - left + 1)
            left = pivotIndex + 1
    return left
Beispiel #15
0
def hw_to_center(a):
    """Convert (top left, width, height) bounding box to (center, width, height) bounding box.

    Args:
        param1 (arr): [xmin, ymin, width, height] where (xmin, ymin) 
            represents the top left of the bounding box 

    Returns:
        arr: [center_x, center_y, width, height]

    """
    bbs = util.partition(a, 4)
    bbs = [[a[0] + a[2] / 2, a[1] + a[3] / 2, a[2], a[3]] for a in bbs]

    return np.concatenate(bbs)
def main(a_few_tags=False):
    if a_few_tags:
        A_FEW_TAGS = ['contemporary gospel']#,'yellow','a blues song form','country influences',"post rock","1970s"]
        Combiner(only_these_tags=set(A_FEW_TAGS), production_run=True).fill_in_zeros()
    else:
        # We run out of memory trying to do all tags at once, so just do 2000 at a time.
        N_TAGS_PER_ROUND = 2000
        PRODUCTION_RUN = True
        combiner = Combiner(production_run=PRODUCTION_RUN)
        all_tags = list(combiner.only_these_tags)
        tag_groups = util.partition(all_tags, N_TAGS_PER_ROUND)
        overwrite_final_tab_file = True
        for group in tag_groups:
            group_combiner = Combiner(production_run=PRODUCTION_RUN, only_these_tags=set(group), overwrite_final_tab_file=overwrite_final_tab_file)
            overwrite_final_tab_file = False # From now on, we'll just append to the current one.
            group_combiner.fill_in_zeros()
Beispiel #17
0
def corners_to_center(a):
    """Convert (top left, bottom right) bounding box to (center, width, height) bounding box.

    Args:
        param1 (arr): [xmin, ymin, xmax, ymax] where (xmin, ymin) 
            and (xmax, ymax) represent the top left and bottom 
            right corners of the bounding box

    Returns:
        arr: [center_x, center_y, width, height]

    """
    bbs = util.partition(a, 4)
    bbs = [[(a[0] + a[2]) / 2, (a[1] + a[3]) / 2,
            int(a[2] - a[0]),
            int(a[3] - a[1])] for a in bbs]
    return np.concatenate(bbs)
Beispiel #18
0
def center_to_hw(a):
    """Convert (center, width, height) bounding box to (top left, width, height) bounding box.

    Args:
        param1 (arr): [center_x, center_y, width, height] where (center_x, center_y) 
            represents the center of the bounding box 

    Returns:
        arr: [xmin, ymin, width, height]

    """
    bbs = util.partition(a, 4)
    bbs = [[int(a[0] - a[2] / 2),
            int(a[1] - a[3] / 2),
            int(a[2]),
            int(a[3])] for a in bbs]

    return np.concatenate(bbs)
Beispiel #19
0
    def notify(self, time, data, isAsync = True):
        # purge expired subscribers
        self.purgeSubscribers()

        for time, bucket in self.subscribers.items():
            index = partition(bucket,
                              lambda sub: time < sub[0] or time > sub[1])

            data = [data]
            for i in range(index, len(bucket)):
                callback = bucket[i][2]
                if isAsync:
                    reactor.callLater(0, callback, data)
                else:
                    callback(data)

            del bucket[index:]

            if len(bucket) == 0:
                del self.subscribers[time]
    def transform_y(self, y, x):
        if not isinstance(y, md.StructuredLabel):
            return y

        for i, label in enumerate(y):
            data, data_type = label
            if (data_type == md.LabelType.BOUNDING_BOX):
                r, c, *_ = x.shape
                boxes = util.partition(
                    data, 4)  # Partition into array of bounding boxes
                masks = [center_to_mask(bb, x)
                         for bb in boxes]  # Create masks from bounding boxes
                trfms = [self.transform_x(mask)
                         for mask in masks]  # Transform masks
                y[i] = (
                    np.concatenate([mask_to_center(t)
                                    for t in trfms]), data_type
                )  # Convert masks back into bounding boxes, save result

        return y
Beispiel #21
0
def gen_type_sk(sk_dir, bases):
  buf = cStringIO.StringIO()
  buf.write("package type;\n")
  buf.write(_const)

  cols, decls = util.partition(lambda c: util.is_collection(c.name), bases)
  decls = filter(lambda c: not util.is_array(c.name), decls)
  itfs, clss = util.partition(op.attrgetter("is_itf"), decls)
  logging.debug("# interface(s): {}".format(len(itfs)))
  logging.debug("# class(es): {}".format(len(clss)))
  # convert interfaces first, then usual classes
  buf.write('\n'.join(util.ffilter(map(to_struct, itfs))))
  buf.write('\n'.join(util.ffilter(map(to_struct, clss))))

  # convert collections at last
  logging.debug("# collection(s): {}".format(len(cols)))
  buf.write('\n'.join(map(col_to_struct, cols)))

  # argument number of methods
  arg_num = map(lambda mtd: len(mtd.params), methods())
  buf.write("""
    #define _{0} {{ {1} }}
    int {0}(int id) {{
      return _{0}[id];
    }}
  """.format(C.typ.argNum, ", ".join(map(str, arg_num))))

  # argument types of methods
  def get_args_typ(mtd):
    def get_arg_typ(param): return str(class_lookup(param[0]).id)
    return '{' + ", ".join(map(get_arg_typ, mtd.params)) + '}'
  args_typ = map(get_args_typ, methods())
  buf.write("""
    #define _{0} {{ {1} }}
    int {0}(int id, int idx) {{
      return _{0}[id][idx];
    }}
  """.format(C.typ.argType, ", ".join(args_typ)))

  # return type of methods
  def get_ret_typ(mtd):
    cls = class_lookup(mtd.typ)
    if cls: return cls.id
    else: return -1
  ret_typ = map(get_ret_typ, methods())
  buf.write("""
    #define _{0} {{ {1} }}
    int {0}(int id) {{
      return _{0}[id];
    }}
  """.format(C.typ.retType, ", ".join(map(str, ret_typ))))

  # belonging class of methods
  belongs = map(lambda mtd: mtd.clazz.id, methods())
  buf.write("""
    #define _{0} {{ {1} }}
    int {0}(int id) {{
      return _{0}[id];
    }}
  """.format(C.typ.belongsTo, ", ".join(map(str, belongs))))

  subcls = \
      map(lambda cls_i: '{' + ", ".join( \
          map(lambda cls_j: str(cls_i <= cls_j).lower(), classes()) \
      ) + '}', classes())
  buf.write("""
    #define _{0} {{ {1} }}
    bit {0}(int i, int j) {{
      return _{0}[i][j];
    }}
  """.format(C.typ.subcls, ", ".join(subcls)))

  ## sub type relations
  #subcls = []
  #for cls_i in classes():
  #  row = []
  #  for cls_j in classes():
  #    row.append(int(cls_i <= cls_j))
  #  subcls.append(row)

  ## sub type relations in yale format 
  #_, IA, JA = util.yale_format(subcls)
  #li, lj = len(IA), len(JA)
  #si = ", ".join(map(str, IA))
  #sj = ", ".join(map(str, JA))
  #buf.write("""
  #  #define _iA {{ {si} }}
  #  #define _jA {{ {sj} }}
  #  int iA(int i) {{
  #    return _iA[i];
  #  }}
  #  int jA(int j) {{
  #    return _jA[j];
  #  }}
  #  bit subcls(int i, int j) {{
  #    int col_i = iA(i);
  #    int col_j = iA(i+1);
  #    for (int col = col_i; col < col_j; col++) {{
  #      if (j == jA(col)) return true;
  #    }}
  #    return false;
  #  }}
  #""".format(**locals()))

  with open(os.path.join(sk_dir, "type.sk"), 'w') as f:
    f.write(buf.getvalue())
    logging.info("encoding " + f.name)
  buf.close()
Beispiel #22
0
def to_struct(cls):

  # make mappings from static fields to corresponding accessors
  def gen_s_flds_accessors(cls):
    s_flds = filter(op.attrgetter("is_static"), cls.flds)
    global _s_flds
    for fld in ifilterfalse(op.attrgetter("is_private"), s_flds):
      cname = fld.clazz.name
      fid = '.'.join([cname, fld.name])
      fname = unicode(repr(fld))
      logging.debug("{} => {}".format(fid, fname))
      _s_flds[fid] = fname

  cname = util.sanitize_ty(cls.name)
  global _ty
  # if this is an interface, merge this into another family of classes
  # as long as classes that implement this interface are in the same family
  if cls.is_itf:
    # interface may have static constants
    gen_s_flds_accessors(cls)
    subss = util.flatten_classes(cls.subs, "subs")
    bases = util.rm_dup(map(lambda sub: find_base(sub), subss))
    # filter out interfaces that extend other interfaces, e.g., Action
    base_clss, _ = util.partition(op.attrgetter("is_class"), bases)
    if not base_clss:
      logging.debug("no implementer of {}".format(cname))
    elif len(base_clss) > 1:
      logging.debug("ambiguous inheritance of {}: {}".format(cname, base_clss))
    else: # len(base_clss) == 1
      base = base_clss[0]
      base_name = base.name
      logging.debug("{} => {}".format(cname, base_name))
      _ty[cname] = base_name
      if cls.is_inner: # to handle inner interface w/ outer class name
        logging.debug("{} => {}".format(repr(cls), base_name))
        _ty[unicode(repr(cls))] = base_name

    return ''

  # if this is the base class having subclasses,
  # make a virtual struct first
  if cls.subs:
    cls = to_v_struct(cls)
    cname = cls.name

  # cls can be modified above, thus generate static fields accessors here
  gen_s_flds_accessors(cls)

  # for unique class numbering, add an identity mapping
  if cname not in _ty: _ty[cname] = cname

  buf = cStringIO.StringIO()
  buf.write("struct " + cname + " {\n  int hash;\n")

  # to avoid static fields, which will be bound to a class-representing package
  _, i_flds = util.partition(op.attrgetter("is_static"), cls.flds)
  buf.write('\n'.join(map(trans_fld, i_flds)))
  if len(i_flds) > 0: buf.write('\n')
  buf.write("}\n")

  return buf.getvalue()
Beispiel #23
0
def to_sk(pgr, sk_dir):
  # clean up result directory
  if os.path.isdir(sk_dir): util.clean_dir(sk_dir)
  else: os.makedirs(sk_dir)

  # reset global variables so that we can run this encoding phase per demo
  reset()

  # update global constants
  # TODO: conservative analysis of possible length of collections
  # TODO: counting .add() calls or something?
  magic_S = 7

  global _const
  _const = u"""
    int S = {}; // length of arrays for Java collections
  """.format(magic_S)

  # type.sk
  logging.info("building class hierarchy")
  pgr.consist()
  # merge all classes and interfaces, except for primitive types
  clss, _ = util.partition(lambda c: util.is_class_name(c.name), classes())
  bases = rm_subs(clss)
  gen_type_sk(sk_dir, bases)

  # cls.sk
  cls_sks = []
  for cls in pgr.classes:
    # skip the collections, which will be encoded at type.sk
    if repr(cls).split('_')[0] in C.collections: continue
    cls_sk = gen_cls_sk(sk_dir, cls)
    if cls_sk: cls_sks.append(cls_sk)

  # log.sk
  gen_log_sk(sk_dir, pgr)

  # main.sk that imports all the other sketch files
  buf = cStringIO.StringIO()

  # --bnd-cbits: the number of bits for integer holes
  bits = max(5, int(math.ceil(math.log(len(methods()), 2))))
  buf.write("pragma options \"--bnd-cbits {}\";\n".format(bits))

  # --bnd-unroll-amnt: the unroll amount for loops
  unroll_amnt = None # use a default value if not set
  unroll_amnt = magic_S # TODO: other criteria?
  if unroll_amnt:
    buf.write("pragma options \"--bnd-unroll-amnt {}\";\n".format(unroll_amnt))

  # --bnd-inline-amnt: bounds inlining to n levels of recursion
  inline_amnt = None # use a default value if not set
  # setting it 1 means there is no recursion
  if inline_amnt:
    buf.write("pragma options \"--bnd-inline-amnt {}\";\n".format(inline_amnt))
    buf.write("pragma options \"--bnd-bound-mode CALLSITE\";\n")

  sks = ["log.sk", "type.sk"] + cls_sks
  for sk in sks:
    buf.write("include \"{}\";\n".format(sk))

  # TODO: make harness (if not exists)

  with open(os.path.join(sk_dir, "main.sk"), 'w') as f:
    f.write(buf.getvalue())
    logging.info("encoding " + f.name)
  buf.close()
Beispiel #24
0
 def toggle(onlights, pos1, pos2):
     turned_off_lights, turned_on_lights = util.partition(
             lambda light: light in onlights,
             create_lights(pos1, pos2)
     )
     return onlights.difference(turned_on_lights).union(turned_off_lights)
Beispiel #25
0
 def wrapper(r):
   args = map(util.binstr2int, util.partition(r, self.args_bit_size))
   log(" > %s@%d | %s" % (name, regs.PC, ' '.join(map(str, args))))
   return fn(*args)
Beispiel #26
0
# paths to folders containing covid positive and coivd negative patients
POSITIVE_CLASS_PATH = r'covid-positive'
NEGATIVE_CLASS_PATH = r'covid-negative'

start_time = time.time()

op = webdriver.ChromeOptions()
op.add_argument('headless') 
drivers = [webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS)]
# drivers = [webdriver.Chrome(ChromeDriverManager().install(), opt) for i in range(NUM_PEERS)]

positive_files = get_files(POSITIVE_CLASS_PATH, NUM_IMAGES, '.png')
negative_files = get_files(NEGATIVE_CLASS_PATH, NUM_IMAGES, '.png')

if DATA_SPLIT == 'partition':
    pos_partitions = partition(positive_files, NUM_PEERS)
    neg_partitions = partition(negative_files, NUM_PEERS)
elif DATA_SPLIT == 'rpartition':
    pos_partitions = r_partition(positive_files, NUM_PEERS)
    neg_partitions = r_partition(negative_files, NUM_PEERS)
elif DATA_SPLIT == 'spartition':
    pos_partitions = s_partition(positive_files, RATIOS)
    neg_partitions = s_partition(negative_files, RATIOS)

for index, driver in enumerate(drivers):
    # Click 'Start Building' on home page
    find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE)

    # Upload files on Task Training
    time.sleep(6)
    if DATA_SPLIT == 'iid':
Beispiel #27
0
# Defines the way to split the data,could be 'iid' for iid data, 'partition' for even size partitions, 'rparition' for random size partitions
# 'spartition' for partition of sizes past as argument RATIOS
DATA_SPLIT = 'rpartition'
RATIOS = [0.5, 0.3, 0.2]

start_time = time.time()

drivers = [
    webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS)
]

data = read_csv(CSV_FILE_PATH)
header = data[0]

if DATA_SPLIT == 'partition':
    res = partition(data, NUM_PEERS)
    for index, r in enumerate(res):
        create_csv(header, r, f"{index}_partition.csv")
elif DATA_SPLIT == 'rpartition':
    res = r_partition(data, NUM_PEERS)
    for index, r in enumerate(res):
        create_csv(header, r, f"{index}_partition.csv")
elif DATA_SPLIT == 'spartition':
    res = s_partition(data, RATIOS)
    for index, r in enumerate(res):
        create_csv(header, r, f"{index}_partition.csv")

for index, driver in enumerate(drivers):
    # Click 'Start Building' on home page
    find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE)
Beispiel #28
0
 def IOs(self):
   ios, _ = util.partition(lambda s: isinstance(s, CallBase), self._logs)
   return ios
Beispiel #29
0
# paths to the file containing the CSV file of Titanic passengers with 12 columns
IMAGE_FILE_PATH = r'CIFAR10'
LABEL_FILE_PATH = 'labels.csv'
NUM_IMAGES = 10


# Download and extract chromedriver from here: https://sites.google.com/a/chromium.org/chromedriver/downloads
op = webdriver.ChromeOptions()
op.add_argument('headless') 
# You can add options=op for chrome headless mode
# drivers = [webdriver.Chrome(ChromeDriverManager().install(), options=op) for i in range(NUM_PEERS)]
drivers = [webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS)]
start_time = time.time()
 
if DATA_SPLIT == 'partition':
    partitions = partition(get_files(IMAGE_FILE_PATH, NUM_IMAGES, '.png'), NUM_PEERS)
elif DATA_SPLIT == 'spartition':
    partitions = s_partition(get_files(IMAGE_FILE_PATH, NUM_IMAGES, '.png'), RATIOS)  
elif DATA_SPLIT == 'rpartition':
    partitions = r_partition(get_files(IMAGE_FILE_PATH, NUM_IMAGES, '.png'), NUM_PEERS)


for index, driver in enumerate(drivers):


    find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE)
    # Upload files on Task Training
    time.sleep(6)
    if DATA_SPLIT != 'iid':
        driver.find_element_by_id('hidden-input_cifar10_Images').send_keys(' \n '.join(partitions[index]))
        driver.find_element_by_id('hidden-input_cifar10_Labels').send_keys(os.path.abspath(LABEL_FILE_PATH))
Beispiel #30
0
def to_struct(cls):

  # make mappings from static fields to corresponding accessors
  def gen_s_flds_accessors(cls):
    s_flds = filter(op.attrgetter("is_static"), cls.flds)
    global _s_flds
    for fld in ifilterfalse(op.attrgetter("is_private"), s_flds):
      cname = fld.clazz.name
      fid = '.'.join([cname, fld.name])
      fname = unicode(repr(fld))
      logging.debug("{} => {}".format(fid, fname))
      _s_flds[fid] = fname

  cname = util.sanitize_ty(cls.name)
  global _ty
  # if this is an interface, merge this into another family of classes
  # as long as classes that implement this interface are in the same family
  if cls.is_itf:
    # interface may have static constants
    gen_s_flds_accessors(cls)
    subss = util.flatten_classes(cls.subs, "subs")
    bases = util.rm_dup(map(lambda sub: find_base(sub), subss))
    # filter out interfaces that extend other interfaces, e.g., Action
    base_clss, _ = util.partition(op.attrgetter("is_class"), bases)
    if not base_clss:
      logging.debug("no implementer of {}".format(cname))
    elif len(base_clss) > 1:
      logging.debug("ambiguous inheritance of {}: {}".format(cname, base_clss))
    else: # len(base_clss) == 1
      base = base_clss[0]
      base_name = base.name
      logging.debug("{} => {}".format(cname, base_name))
      _ty[cname] = base_name
      if cls.is_inner: # to handle inner interface w/ outer class name
        logging.debug("{} => {}".format(repr(cls), base_name))
        _ty[unicode(repr(cls))] = base_name

    return ''

  # if this is the base class having subclasses,
  # make a virtual struct first
  if cls.subs and not cls.is_aux:
    cls = to_v_struct(cls)
    cname = cls.name

  # cls can be modified above, thus generate static fields accessors here
  gen_s_flds_accessors(cls)

  # for unique class numbering, add an identity mapping
  if cname not in _ty: _ty[cname] = cname

  buf = cStringIO.StringIO()
  buf.write("struct " + cname + " {\n  int hash;\n")

  # to avoid static fields, which will be bound to a class-representing package
  _, i_flds = util.partition(op.attrgetter("is_static"), cls.flds)
  buf.write('\n'.join(map(trans_fld, i_flds)))
  if len(i_flds) > 0: buf.write('\n')
  buf.write("}\n")

  return buf.getvalue()
Beispiel #31
0
	def sample_sort(self,data,n_samples,comm):
		#n_samples es el número de muestras que se tomaran
		#n_samples>comm.Get_size()
		#m lo dejaremos como comm.Get_size()-1
		rank = comm.Get_rank()
		# n is the number of processes
		n = comm.Get_size()

		#### 1. Choose a random sample from the data array
		
		if (rank == 0):
			indices = [i for i in range(len(data))]
			random_indices = []
			for i in range(n_samples):
				r = random.randrange(len(indices))
				random_indices.append(indices.pop(r))
			sample = []
			for index in random_indices:
				sample.append(data[index])
		else:
			sample = None
		sample = comm.bcast(sample, root = 0)

		#### 2. Distributively sort the sample using merge sort
		
		sorted_sample = self.bucket_sort(sample, comm)
		

		#### 3. Choose n -1 bucket separators
		
		if (rank == 0):
			for i in range(n_samples - n + 1):
				size = len(sorted_sample)
				del sorted_sample[random.randrange(size)]
			print ('separators ' + str(sorted_sample))
		sorted_sample = comm.bcast(sorted_sample, root = 0)

		#### 4. Distribute data in buckets
		
		if (rank == 0):
			data_partition = util.partition(data, n)
		else:
			data_partition = None
		data_part = comm.scatter(data_partition, root = 0)
		print ('data parts ' + str(data_part))
		
		###### 4.1 each bucket assigns the corresponding bucket of a group of
		######     elements
		
		assignation = []
		## TODO BORRAR ELEMENTOS DEL DATAPART
		for element in data_part:
			if (element > sorted_sample[-1]):
				assignation.append((element, n - 1))
			else:
				for i in range((len(sorted_sample))):
					separator = sorted_sample[i]
					if (element <= separator):
						assignation.append((element, i))
						break
		print ('assignation rank:' + str(rank) + ' = ' + str(assignation))
		
		###### 4.2 First node collects the assignations and sends elements to
		######	   their corresponding bucket
		
		assignations = comm.gather(assignation, root = 0)
		if (rank == 0):
			assignations = np.concatenate(assignations).tolist()
			temp = [[] for i in range(n)]
			for element, bucket_id in assignations:
				temp[bucket_id].append(element)
			for bucket in temp:
				for i in range(n - 1):
					comm.send(temp[i + 1], dest = (i + 1), tag = 2)
			data = temp[0]

		else:
			data = []
			data = comm.recv(source = 0, tag = 2)

		print ('bucket ' + str(rank) + ' -> ' + str(data))

		#### 5. Sort in each bucket and collect it

		data.sort()
		data = comm.gather(data, root = 0)
		if (rank == 0):
			print ('sorted data: ' + str(np.concatenate(data)))
			return np.concatenate(data).tolist()
Beispiel #32
0
start_time = time.time()

op = webdriver.ChromeOptions()
op.add_argument('headless')
drivers = [
    webdriver.Chrome(ChromeDriverManager().install()) for i in range(NUM_PEERS)
]

digit_files = [
    get_files(DIGIT_CLASS_PATHS[i], NUM_IMAGES, '.jpg')
    for i in range(len(DIGIT_CLASS_PATHS))
]

if DATA_SPLIT == 'partition':
    digit_partitions = [
        partition(digit_files[i], NUM_PEERS) for i in range(len(digit_files))
    ]
elif DATA_SPLIT == 'rpartition':
    digit_partitions = [
        r_partition(digit_files[i], NUM_PEERS) for i in range(len(digit_files))
    ]
elif DATA_SPLIT == 'spartition':
    digit_partitions = [
        s_partition(digit_files[i], RATIOS) for i in range(len(digit_files))
    ]

for index, driver in enumerate(drivers):
    find_task_page(driver, PLATFORM, TASK_NAME, TRAINING_MODE)
    time.sleep(8)
    if DATA_SPLIT == 'iid':
        for i in range(len(DIGIT_CLASS_PATHS)):
Beispiel #33
0
 def IOs(self):
     ios, _ = util.partition(lambda s: isinstance(s, CallBase), self._logs)
     return ios
savepath = '../parses/eps-40k-ml10-3trans/'
predictpath =  'prediction/eps-40k-ml10-3trans/second/minibatch=10/'

parses = [load_parses_separate(savepath, k) for k in range(28000)]

# Optional: training on parses with non-empty ref-forests.
cleaned_parses = [(target_forest, ref_forest, src_fsa, tgt_sent) for (target_forest, ref_forest, src_fsa, tgt_sent) in parses if ref_forest]
print(len(cleaned_parses))

lexicon = load_lexicon(savepath)
fset = load_featureset(savepath)

# initialize weights uniformly
w_init = defaultdict(float)
for feature in fset:
    w_init[feature] = 1e-2

k = 10
minibatches = partition(cleaned_parses, k)
w_trained, delta_ws, likelihoods = sgd_minibatches(iters=5, delta_0=10, w=w_init, minibatches=minibatches, batch_size=k, parses=cleaned_parses, 
									  			   shuffle=True, sparse=True, scale_weight=2, regularizer=1, lmbda=1,
									  			   bar=True, log=False, log_last=False, check_convergence=False, 
									  			   savepath=False, prediction=predictpath, prediction_length=20)

print(likelihoods)

# printing for verification
w = w_trained[-1]
for k, v in sorted(w.items(), key=lambda x: x[1], reverse=True):
	print('{}'.format(k).ljust(25) + '{}'.format(v))
Beispiel #35
0
 def evts(self):
   _, evts = util.partition(lambda s: isinstance(s, CallBase), self._logs)
   return evts
for x in input[1 : ]:
    if characters[-1] == '\\':
        characters[-1] = characters[-1] + x
    elif characters[-1] in ["\\'", '\\`']:
        characters[-1] = characters[-1] + x
    elif characters[-1].startswith(("\\'", '\\`')):
        if len(characters[-1]) == 2:
            characters[-1] = characters[-1] + x
        else:
            characters.append(x)
    elif characters[-1].startswith('\\'):
        if characters[-1].endswith(' '):
            characters.append(x)
        else:
            characters[-1] = characters[-1] + x
    elif x == 'v':
        characters[-1] = characters[-1] + x
    else:
        characters.append(x)

parts = partition(characters, [7, 33], cyclic = True, action = 'new')
result = []
for i, part in enumerate(parts):
    glued = ''.join(part)
    if i % 2 == 0:
        result.append(r'\textcolor{red}{%s}' % glued)
    else:
        result.append(r'\textcolor{white}{%s}' % glued)

file('1.txt', 'w').write(''.join(result))
Beispiel #37
0
def sgd_minibatches(iters, delta_0, w, minibatches=[], parses=[], batch_size=20,
                    sparse=False, log=False, bar=True, 
                    prob_log=False, log_last=False,
                    check_convergence=False,
                    scale_weight=False,
                    regularizer=False,
                    lmbda=2.0,
                    savepath=False,
                    prediction=False,
                    shuffle=False,
                    prediction_length=10):
    """
    Performs stochastic gradient descent on the weights vector w on
    minibatches = [minibatch_1, minibatch_2,....,minibatch_N].

    We are decaying the learning rate after each minibatch. We follow the following rule
    from http://cilvr.cs.nyu.edu/diglib/lsml/bottou-sgd-tricks-2012.pdf section 5.2:

    delta_k = delta_0 * (1 + delta_0*lmbda*k)**(−1)

    where k is the index of the minibatch and delta_0 is the initial learning rate,
    and lmbda is another hyperparameter that controls the rate of decay.
    """ 
    likelihoods = list()
    avg_likelihoods = list()
    ws = []
    delta_ws = []
    for i in range(iters):
        
        print('Iteration {0}/{1}'.format(i+1, iters))

        learning_rates = list()
        if bar and not (i==iters-1 and log_last): bar = progressbar.ProgressBar(max_value=len(minibatches))
            
        if shuffle:
            minibatches = partition(random.sample(parses, len(parses)), batch_size)

        for k, minibatch in enumerate(minibatches):
            delta_w = 0.0
            w_new = defaultdict(float)
            
            delta_k = delta_0 * (1 + delta_0*(lmbda*(i*len(minibatches)+k)))**(-1) # this is delta_k = delta_0 when k=0 and i=0
            
            learning_rates.append(delta_k)

            if bar and not (i==iters-1 and log_last): bar.update(k)

            for m, parse in enumerate(minibatch):
                # unpack parse

                target_forest, ref_forest, src_fsa, tgt_sent = parse
                
                ### D_n(x) ###

                tgt_edge2fmap, _ = featurize_edges(target_forest, src_fsa, tgt_sent=tgt_sent,
                                                   sparse_del=sparse, sparse_ins=sparse, sparse_trans=sparse)

                # recompute edge weights
                tgt_edge_weights = {edge: np.exp(weight_function(edge, tgt_edge2fmap[edge], w)) for edge in target_forest}
                # compute inside and outside
                tgt_tsort = top_sort(target_forest)
                root_tgt = Nonterminal("D_n(x)")
                I_tgt = inside_algorithm(target_forest, tgt_tsort, tgt_edge_weights)
                O_tgt = outside_algorithm(target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt)
                # compute expected features
                expected_features_Dn_x = expected_feature_vector(target_forest, I_tgt, O_tgt, tgt_edge2fmap)

                ### D(x,y) ###

                ref_edge2fmap, _ = featurize_edges(ref_forest, src_fsa, tgt_sent=tgt_sent,
                                                   sparse_del=sparse, sparse_ins=sparse, sparse_trans=sparse)
                # recompute edge weights
                ref_edge_weights = {edge: np.exp(weight_function(edge, ref_edge2fmap[edge], w)) for edge in ref_forest}

                # compute inside and outside
                tsort = top_sort(ref_forest)
                root_ref = Nonterminal("D(x,y)")
                I_ref = inside_algorithm(ref_forest, tsort, ref_edge_weights)
                O_ref = outside_algorithm(ref_forest, tsort, ref_edge_weights, I_ref, root_ref)
                # compute expected features
                expected_features_D_xy = expected_feature_vector(ref_forest, I_ref, O_ref, ref_edge2fmap)
                # update w
                w_step, d_w = update_w(w, expected_features_D_xy, expected_features_Dn_x, delta=delta_k, regularizer=regularizer)
                
                # store likelihoods
                if I_ref and I_tgt: # for the case of an empty forest! since log(0) = -inf
                    # compute the likelihood of the target sentence
                    l = np.log(I_ref[root_ref]) - np.log(I_tgt[root_tgt])
                    if np.isfinite(l):
                        likelihoods.append(l)
                    else:
                        likelihoods.append(likelihoods[-1])
                else:
                    likelihoods.append(likelihoods[-1])
                avg_likelihood = sum(likelihoods) / len(likelihoods)
                avg_likelihoods.append(avg_likelihood)

                # the update is averaged over the minibatch
                delta_w += d_w / len(minibatch)
                for feature, value in w_step.items():
                    w_new[feature] += value / len(minibatch)

                if log or (i==iters-1 and log_last):
                    print("x = '{}'".format(src_fsa.sent))
                    print("y = '{}'".format(tgt_sent))
                    
                    print('Viterbi')
                    d = viterbi(target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt) # use exp!
                    candidates = write_derrivation(d)
                    print("Best y = '{}'".format(candidates.pop()))
                    print('P(y,d|x) = {}'.format(joint_prob(d, tgt_edge_weights, I_tgt, root_tgt, log=prob_log)))
                    
                    n = 100
                    d, count = ancestral_sample(n, target_forest, tgt_tsort, tgt_edge_weights, I_tgt, root_tgt) # use exp!
                    candidates = write_derrivation(d)
                    print('Most sampled: {0}/{1}'.format(count, n))
                    print("Best y = '{}'".format(candidates.pop()))
                    print('P(y,d|x) = {}\n'.format(joint_prob(d, tgt_edge_weights, I_tgt, root_tgt, log=prob_log)))

            if bar and not (i==iters-1 and log_last): bar.update(k+1)

            # hack: scale weights so that they are at most of the scale 10**scale_weight
            if scale_weight:
                abs_max = max(map(abs, w_new.values()))
                if np.isfinite(abs_max):
                    for k, v in w_new.items():
                        w_new[k] = v / 10**(int(np.log10(abs_max))+1 - scale_weight)
                    # update
                    w = w_new
                else:
                    # return to previous weight
                    print('inf or nan')
                    w = ws[-2]
                    print(tgt_sent)


            # update after each minibatch
            # w = w_new        
            ws.append(w)
            delta_ws.append(delta_w)

        if bar and not (i==iters-1 and log_last): bar.finish()

        if savepath:
            save_weights(w, savepath + 'trained-{}-'.format(i+1))

        if check_convergence:
            print('delta w: {}\n'.format([ds / len(w.keys()) for ds in delta_ws]))
            print('Learning rates: {}'.format(learning_rates))

        # if prediction and i%5==0: # save every 5 iterations
        predict(parses[0:prediction_length], w, i+1, prediction)

    return ws, delta_ws, avg_likelihoods
Beispiel #38
0
def scan_and_update_history(fs, fs_root, root_mark, path_filter, hash_type,
                            history_store, peerid, groupids, clock, slog):
    with slog.time("read history") as rt:
        history_entries = history_store.read_entries(peerid)
        rt.set_result({"history entries": len(history_entries)})

    with slog.time("scan files") as rt:
        file_stats = list(fs.list_stats(
            fs_root, root_mark, names_to_ignore = path_filter.names_to_ignore))
        rt.set_result({"file stats": len(file_stats)})

    with slog.time("diff file stats") as rt:
        fdiffs = diff_file_stats(file_stats, history_entries, groupids, slog)
        ignored_fdiffs, fdiffs = partition(fdiffs,
            lambda fdiff: path_filter.ignore_path(fdiff.rpath.full))
        slog.ignored_rpaths(fdiff.rpath for fdiff in ignored_fdiffs)
        rt.set_result({"file diffs": len(fdiffs)})

    with slog.time("hash files") as rt:
        hashed_fdiffs = list(hash_file_diffs(fs, fdiffs, hash_type, slog))
        rt.set_result({"hashed file diffs": len(hashed_fdiffs)})

    # We rescan the files to make sure they are stable.  We might
    # decided to do this before hashing if there are lots of big
    # unstable files.  But I think we'll usually be stable.
    with slog.time("rescan files") as rt:
        rescan_stats = list(fs.stats(
            (fdiff.rpath for fdiff in hashed_fdiffs)))
        rt.set_result({"rescanned file stats": len(rescan_stats)})

    with slog.time("check change stability") as rt:
        rescan_stats_by_rpath = dict((rpath, (size, mtime))
                                     for rpath, size, mtime in file_stats)

        def is_stable(fdiff):
            (rescan_size, rescan_mtime) = rescan_stats_by_rpath.get(
                fdiff.rpath, (DELETED_SIZE, DELETED_MTIME))
            return fdiff.size == rescan_size and \
                   mtimes_eq(fdiff.mtime, rescan_mtime)

        stable_fdiffs, unstable_fdiffs = partition(hashed_fdiffs, is_stable)
        rt.set_result({"stable file diffs": len(stable_fdiffs),
                       "unstable file diffs": len(unstable_fdiffs)})

    with slog.time("insert new history entries"):
        new_entries = list(new_history_entries_from_file_diffs(
            stable_fdiffs, peerid, clock))
        if new_entries:
            history_store.add_entries(new_entries)

    # Techincally, we don't have to do this, but it's nice to log this
    # after every scan.
    with slog.time("reread history") as rt:
        history_entries = history_store.read_entries(peerid)
        history_by_gpath = group_history_by_gpath(history_entries)
        total_size = sum(history.latest.size for history in
                         history_by_gpath.itervalues())
        rt.set_result({"path count": len(history_by_gpath),
                       "total size": total_size})

    return history_entries
Beispiel #39
0
def to_sk(cmd, smpls, tmpl, sk_dir):
  # clean up result directory
  if os.path.isdir(sk_dir): util.clean_dir(sk_dir)
  else: os.makedirs(sk_dir)

  # reset global variables so that we can run this encoding phase per demo
  reset()

  # update global constants
  def logged(mtd):
    if mtd.is_init: return False
    clss = util.flatten_classes([mtd.clazz], "subs")
    return sample.mtd_appears(smpls, clss, mtd.name)
  mtds = filter(logged, methods())
  if mtds:
    n_params = 2 + max(map(len, map(op.attrgetter("params"), mtds)))
  else: # no meaningful logs in the sample?
    n_params = 2

  n_evts = sample.max_evts(smpls)
  if cmd == "android":
    n_views = sample.max_views(smpls)
    magic_S = max(3, n_evts + 1, n_views)
  else:
    magic_S = max(5, n_evts + 1) # at least 5, just in case

  n_ios = sample.max_IOs(smpls)

  global _const
  _const = u"""
    int P = {}; // length of parameters (0: (>|<)mid, 1: receiver, 2...)
    int S = {}; // length of arrays for Java collections
    int N = {}; // length of logs
  """.format(n_params, magic_S, n_ios)

  # type.sk
  logging.info("building class hierarchy")
  tmpl.consist()
  # merge all classes and interfaces, except for primitive types
  clss, _ = util.partition(lambda c: util.is_class_name(c.name), classes())
  bases = rm_subs(clss)
  gen_type_sk(sk_dir, bases)

  # cls.sk
  cls_sks = []
  for cls in tmpl.classes:
    # skip the collections, which will be encoded at type.sk
    if repr(cls).split('_')[0] in C.collections: continue
    cls_sk = gen_cls_sk(sk_dir, smpls, cls)
    if cls_sk: cls_sks.append(cls_sk)

  # sample_x.sk
  smpl_sks = []
  for smpl in smpls:
    smpl_sk = "sample_" + smpl.name + ".sk"
    smpl_sks.append(smpl_sk)
    sk_path = os.path.join(sk_dir, smpl_sk)
    gen_smpl_sk(sk_path, smpl, tmpl, tmpl.harness(smpl.name))

  # log.sk
  gen_log_sk(sk_dir, tmpl)

  # sample.sk that imports all the other sketch files
  buf = cStringIO.StringIO()

  # deprecated as we use regex generator for class/method roles
  ## --bnd-cbits: the number of bits for integer holes
  #bits = max(5, int(math.ceil(math.log(len(methods()), 2))))
  #buf.write("pragma options \"--bnd-cbits {}\";\n".format(bits))

  # --bnd-unroll-amnt: the unroll amount for loops
  unroll_amnt = max(n_params, magic_S)
  buf.write("pragma options \"--bnd-unroll-amnt {}\";\n".format(unroll_amnt))

  # --bnd-inline-amnt: bounds inlining to n levels of recursion
  inline_amnt = None # use a default value if not set
  if cmd == "android":
    #inline_amnt = 2 # depth of View hierarchy (at findViewByTraversal)
    inline_amnt = 1 # no recursion for flat Views
  elif cmd == "gui":
    # setting it 1 means there is no recursion
    inline_amnt = 1
  if inline_amnt:
    buf.write("pragma options \"--bnd-inline-amnt {}\";\n".format(inline_amnt))
    buf.write("pragma options \"--bnd-bound-mode CALLSITE\";\n")

  sks = ["log.sk", "type.sk"] + cls_sks + smpl_sks
  for sk in sks:
    buf.write("include \"{}\";\n".format(sk))
  with open(os.path.join(sk_dir, "sample.sk"), 'w') as f:
    f.write(buf.getvalue())
    logging.info("encoding " + f.name)
  buf.close()
Beispiel #40
0
 def evts(self):
     _, evts = util.partition(lambda s: isinstance(s, CallBase), self._logs)
     return evts
Beispiel #41
0
def gen_type_sk(sk_dir, bases):
  buf = cStringIO.StringIO()
  buf.write("package type;\n")
  buf.write(_const)

  buf.write(trans_lib())
  buf.write('\n')

  cols, decls = util.partition(lambda c: util.is_collection(c.name), bases)
  decls = filter(lambda c: not util.is_array(c.name), decls)
  itfs, clss = util.partition(op.attrgetter("is_itf"), decls)
  logging.debug("# interface(s): {}".format(len(itfs)))
  logging.debug("# class(es): {}".format(len(clss)))
  # convert interfaces first, then usual classes
  buf.write('\n'.join(util.ffilter(map(to_struct, itfs))))
  buf.write('\n'.join(util.ffilter(map(to_struct, clss))))

  # convert collections at last
  logging.debug("# collection(s): {}".format(len(cols)))
  buf.write('\n'.join(map(col_to_struct, cols)))

  # argument number of methods
  arg_num = map(lambda mtd: len(mtd.params), methods())
  buf.write("""
    #define _{0} {{ {1} }}
    int {0}(int id) {{
      return _{0}[id];
    }}
  """.format(C.typ.argNum, ", ".join(map(str, arg_num))))

  # argument types of methods
  def get_args_typ(mtd):
    def get_arg_typ(param): return str(class_lookup(param[0]).id)
    return '{' + ", ".join(map(get_arg_typ, mtd.params)) + '}'
  args_typ = map(get_args_typ, methods())
  buf.write("""
    #define _{0} {{ {1} }}
    int {0}(int id, int idx) {{
      return _{0}[id][idx];
    }}
  """.format(C.typ.argType, ", ".join(args_typ)))

  # return type of methods
  def get_ret_typ(mtd):
    cls = class_lookup(mtd.typ)
    if cls: return cls.id
    else: return -1
  ret_typ = map(get_ret_typ, methods())
  buf.write("""
    #define _{0} {{ {1} }}
    int {0}(int id) {{
      return _{0}[id];
    }}
  """.format(C.typ.retType, ", ".join(map(str, ret_typ))))

  # belonging class of methods
  belongs = map(lambda mtd: mtd.clazz.id, methods())
  buf.write("""
    #define _{0} {{ {1} }}
    int {0}(int id) {{
      return _{0}[id];
    }}
  """.format(C.typ.belongsTo, ", ".join(map(str, belongs))))

  subcls = \
      map(lambda cls_i: '{' + ", ".join( \
          map(lambda cls_j: str(cls_i <= cls_j).lower(), classes()) \
      ) + '}', classes())
  buf.write("""
    #define _{0} {{ {1} }}
    bit {0}(int i, int j) {{
      return _{0}[i][j];
    }}
  """.format(C.typ.subcls, ", ".join(subcls)))

  ## sub type relations
  #subcls = []
  #for cls_i in classes():
  #  row = []
  #  for cls_j in classes():
  #    row.append(int(cls_i <= cls_j))
  #  subcls.append(row)

  ## sub type relations in yale format 
  #_, IA, JA = util.yale_format(subcls)
  #li, lj = len(IA), len(JA)
  #si = ", ".join(map(str, IA))
  #sj = ", ".join(map(str, JA))
  #buf.write("""
  #  #define _iA {{ {si} }}
  #  #define _jA {{ {sj} }}
  #  int iA(int i) {{
  #    return _iA[i];
  #  }}
  #  int jA(int j) {{
  #    return _jA[j];
  #  }}
  #  bit subcls(int i, int j) {{
  #    int col_i = iA(i);
  #    int col_j = iA(i+1);
  #    for (int col = col_i; col < col_j; col++) {{
  #      if (j == jA(col)) return true;
  #    }}
  #    return false;
  #  }}
  #""".format(**locals()))

  with open(os.path.join(sk_dir, "type.sk"), 'w') as f:
    f.write(buf.getvalue())
    logging.info("encoding " + f.name)
  buf.close()
	print("Calculate average time resolutions...")
	avg_time_res = map(avg, min_max_t)
	total_avg_time_res = sum([t for userid, t in avg_time_res])/len(avg_time_res)

	print("avg time between positions: " + str(total_avg_time_res))
	fig = plt.figure()
	plt.hist([v[1] for v in avg_time_res], bins = 50)
	plt.close()

	print("Reading file and counting cell occurencies...")
	cell_occ = util.MapReduce(user_positions, addByKey, initializer = init)
	occ = cell_occ(config.USERS)

	print("Creating cell occurrency graphs...")
	for userid, celldata in util.partition(map(group_user, occ)):
		cells = sorted(set([(p[0], p[1]) for p in celldata]))
		counts = [0] * len(cells)
		for lat, lon, count in celldata:
			counts[cells.index((lat,lon))] += count
		fig = plt.figure()
		plt.ylabel("# occurrences")
		plt.bar([c[0] for c in enumerate(counts)], counts, color = (0.0,0.0,0.8,1.0),  linewidth= 0.0)
		plt.savefig("figs/counts/" + userid + ".png")
		plt.close()

	print("Creating timeline graphs...")
	init()
	for i, traj in enumerate(map(user_trajectory_positions, config.USERS)):
		userid = traj[0][0]
		posdata = [(p[1][0], p[1][1], p[1][2]) for p in traj]
Beispiel #43
0
	def bucket_sort(self, data, comm):
		#data es el subarreglo asignado al proceso
		#comm permite comunicarse con los demas procesos
		rank = comm.Get_rank()
		# n is the number of processes
		n = comm.Get_size()

		#### 1. Find min and max and bucket lenght

		if (rank == 0):
			# find the min (a) and the max (b) number in the array
			max_num = data[0]
			min_num = data[0]
			for i in range(len(data)):
				if (data[i] < min_num):
					min_num = data[i]
				elif (data[i] > max_num):
					max_num = data[i]
			# broadcast the bucket division found
			bucket_len = (max_num - min_num) // n
			# the lower limit for each bucket i is a_i = a + i * bucket_length
			buckets_info = (min_num, bucket_len)
			data = util.partition(data, n)
		else:
			buckets_info = None
		buckets_info = comm.bcast(buckets_info, root = 0)


		#### 2. Each bucket finds in which buckets the assigned elemnts must go

		# Now we can scatter the array and each process might decide the
		# corresponding bucket for each number that has
		assigned_numbers = comm.scatter(data, root = 0)
		# This array will have a tuple in each position in the form (num, #bucket)
		assignation = []
		# The corresponding bucket number is (num-min)//bucket_length with int
		min_num = buckets_info[0]
		bucket_len = buckets_info[1]

		for num in assigned_numbers:
			assignation.append((num, ((num - min_num) // bucket_len)))

		data = comm.gather(assignation, root = 0)

		#### 3. First node sends the corresponding elements for each bucket
		
		if (rank == 0):
			data = np.concatenate(data)
			temp = [[] for i in range(n)]
			for number, bucket_id in data:
				if (bucket_id == n):
					bucket_id = n-1
				temp[bucket_id].append(number)
			for i in range(n - 1):
				comm.send(temp[i + 1], dest = (i + 1), tag = 1)
			data = temp[0]
		else:
			data = []
			data = comm.recv(source = 0, tag = 1)

		#### 4. Each bucket sorts its elements
		
		data.sort()

		#### 5. First node collects the sorted elements, knowing that the next
		####    bucket always has greater numbers
		
		data = comm.gather(data, root = 0)
		if (comm.rank == 0):
			return np.concatenate(data).tolist()