コード例 #1
0
ファイル: main.py プロジェクト: plredmond-homework/irpy
def some_stats(corpus, docid, termv):
    doc = corpus.get_doc(docid)
    print(docid, "vocabulary {}:{} == {:.2}".format(doc.unique_len, doc.total_len, doc.unique_len / doc.total_len))
    # exclude terms which appear only in one document (names, twitter handles)
    termv = list(filter(lambda t: corpus.get_term(t).document_frequency > 1, termv))
    # function: return 5 terms sorted by a key function
    bykeyfun = lambda kf: sorted(zip(map(kf, termv), termv), reverse=True)[:10]
    # key functions
    tf = lambda stem: corpus.get_term(stem).term_frequency(docid)
    idf = lambda stem: corpus.get_term(stem).inverse_document_frequency
    tfidf = lambda stem: tf(stem) * idf(stem)
    # table spec
    cols = sorted(
        {
            "tf": tf,
            "idf": idf,
            "df": lambda stem: corpus.get_term(stem).document_frequency,
            "cf": lambda stem: corpus.get_term(stem).corpus_frequency,
            "tf*idf": tfidf,
        }.items()
    )
    # line format, heading
    hfmt = " | ".join(len(cols) * ["{:>6} {:<16}"])
    fmt = " | ".join(len(cols) * ["{:>6.4g} {!s:<16.16}"])
    print(hfmt.format(*util.flatten(zip([name for name, _ in cols], itertools.repeat("term")))))
    # data
    coldata = [bykeyfun(kf) for _, kf in cols]
    for row in zip(*coldata):
        print(fmt.format(*util.flatten(row)))
コード例 #2
0
ファイル: reducer.py プロジェクト: plum-umd/pasket
def reduce_anno_s(tmpl, cls, mtd, s):
  curried_e = partial(reduce_anno_e, tmpl, cls, mtd)
  curried_s = partial(reduce_anno_s, tmpl, cls, mtd)

  if s.kind in [C.S.EXP, C.S.ASSERT, C.S.RETURN]:
    red_e = curried_e(s.e)
    if type(red_e) is list: return red_e
    else: s.e = red_e

  elif s.kind == C.S.ASSIGN:
    s.le = curried_e(s.le)
    s.re = curried_e(s.re)

  elif s.kind == C.S.IF:
    s.e = curried_e(s.e)
    s.t = util.flatten(map(curried_s, s.t))
    s.f = util.flatten(map(curried_s, s.f))

  elif s.kind in [C.S.WHILE, C.S.REPEAT]:
    s.e = curried_e(s.e)
    s.b = util.flatten(map(curried_s, s.b))

  elif s.kind == C.S.FOR:
    s.i = curried_e(s.i)
    s.init = curried_e(s.init)
    s.b = util.flatten(map(curried_s, s.b))

  return [s]
コード例 #3
0
 def getPlayerId(self, *args):
     """Get the id of the current player"""
     a = tuple(flatten(args))
     if self.playerId is not None and len(a) == 0:
         return self.playerId
     else:
         return int(self.conn.sendReceive_flat("world.getPlayerId", flatten(args)))
コード例 #4
0
ファイル: main.py プロジェクト: plredmond-homework/irpy
def group_by_sender(messages):
    """[Email] -> {str: [str]} : Associate lowercased email sender with a list of words."""
    wordssd = collections.defaultdict(list)
    for m in messages:
        words = util.flatten(map(str.split, m.lines))
        wordssd[m.sender.lower()].append(words)
    return {sender: util.flatten(wordss) for sender, wordss in wordssd.items()}
コード例 #5
0
ファイル: _test.py プロジェクト: nicLucian/pytis
 def test_flatten(self):
     def assertEqualListOrTuple(actual, expected):
         assert isinstance(expected, (list, tuple,)), "Test logic error"
         self.assertIsInstance(actual, (list, tuple,))
         self.assertSequenceEqual(actual, expected)
     assertEqualListOrTuple(util.flatten([]), [])
     assertEqualListOrTuple(util.flatten([[([])]]), [])
     assertEqualListOrTuple(util.flatten([[1,2],3,[[4]],[(5,[6,7],8)]]), [1,2,3,4,5,6,7,8])
def getFormulaIdsFromPars(pars, onlyTheorems):
    thmPars = None
    if onlyTheorems:
        thmPars = map(lambda x: x[1], filter(lambda par: re.search(r"thm", par[0]), pars.items()))
    else:
        thmPars = map(lambda x: x[1], pars.items())

    formulaTokens = filter(lambda token : token[:5] == "<fid ", flatten(flatten(thmPars)))

    return map(lambda token: token[5:-1], formulaTokens)
コード例 #7
0
ファイル: main.py プロジェクト: dwinkelman/EcoCartographer
def Intersections(pts, console):
    '''Returns a dictionary of Intersections with Connections, with strings as keys.'''
    intind = OriginalIntersections(pts)
    net = IntersectionsJoin(intind)
    trimmed = IntersectionsTrim(net)
    rejoined = IntersectionsJoin(trimmed)
    intersections = IntersectionsBuild(rejoined, pts)
    t0 = time.time()
    intsInRange = list(set(util.flatten(map(lambda a: a[0], OptimalDistance(intersections))))) #makes list of intersections within reasonable distance to start/end
    ultimate_trim = sorted(util.flatten(map(lambda a: intersections[a].references[0].references, intsInRange)))
    console.add('Intersections', error=': '+str(time.time()-t0))
    return IntersectionsBuild(IntersectionsJoin(ultimate_trim), pts)
コード例 #8
0
ファイル: pdf.py プロジェクト: mastbaum/tl208-residuals
def make(filenames, nprocs, cut):
    '''Create time residual PDF for a set of data files.

    Note: you may wish to use a smaller number of nprocs than you have CPUs;
    this function will almost certainly be I/O-bound.

    :param filenames: list of RAT ROOT files containing data
    :param cut: A Cut instance with cuts to apply to data
    :param nprocs: number of parallel jobs to run
    '''
    p = multiprocessing.Pool(nprocs)
    erf = ERF(cut=cut)

    res = np.array(list(util.flatten(p.map(erf, filenames))))

    print
    print len(res), 'entries'
    h, e = np.histogram(res, bins=750, range=(cut.t[0],cut.t[1]), normed=True)

    pdf = np.array(zip(e,h))

    print 'total events:', total_events.value
    print 'events reconstructed:', events_reconstructed.value
    print 'events passing cuts:', events_passing_cuts.value

    with open('event_counts.txt', 'a') as f:
        f.write('%s %s %s  %i  %i %i %i\n' % (str(cut.e), str(cut.r), str(cut.r),
                                            len(res),
                                            total_events.value,
                                            events_reconstructed.value,
                                            events_passing_cuts.value))

    return pdf
コード例 #9
0
ファイル: model.py プロジェクト: MichaelPaddon/volatility
    def __init__(self, past, future, features = None):
        """Create a training pattern.

        Parameters:
        past -- past feature vectors as a tensor of shape [P, V]
            where P is past days and V is the vectors/day
        future -- future feature vectors as a tensor of [F, V]
            where F is future days and V is the vectors/day
        features -- a sequence of feature names to use
            where None means use all features
        """

        # calculate training input from past features
        past_subfeatures = [[self._subfeatures(vector, features)
            for vector in vectors]
                for vectors in past]
        self._input = numpy.array(
            [list(util.flatten(vectors)) for vectors in past_subfeatures])

        # calculate training output from future volatility
        future_returns = numpy.log1p(
            [[vector.ret for vector in vectors] for vectors in future])
        self._output = numpy.std(future_returns, axis = 0, ddof = 1)\
            * numpy.sqrt(252)

        # calculate past returns for forecasts
        self._past_returns = numpy.log1p(
            [[vector.ret for vector in vectors] for vectors in past])
コード例 #10
0
ファイル: minimize.py プロジェクト: qq547276542/e2e-coref
  def finalize(self):
    merged_clusters = []
    for c1 in self.clusters.values():
      existing = None
      for m in c1:
        for c2 in merged_clusters:
          if m in c2:
            existing = c2
            break
        if existing is not None:
          break
      if existing is not None:
        print("Merging clusters (shouldn't happen very often.)")
        existing.update(c1)
      else:
        merged_clusters.append(set(c1))
    merged_clusters = [list(c) for c in merged_clusters]
    all_mentions = util.flatten(merged_clusters)
    assert len(all_mentions) == len(set(all_mentions))

    return {
      "doc_key": self.doc_key,
      "sentences": self.sentences,
      "speakers": self.speakers,
      "clusters": merged_clusters
    }
コード例 #11
0
def parsToFeatureCounts(pars, onlyTheorems):
    thmPars = None
    if onlyTheorems:
        thmPars = map(lambda x: x[1], filter(lambda par: re.search(r"thm", par[0]), pars.items()))
    else:
        thmPars = map(lambda x: x[1], pars.items())

    textTokenList = filter(lambda token: not(token[:5] == "<fid "), flatten(flatten(thmPars)))

    tokenCounts = {}
    for token in textTokenList:
        if token not in tokenCounts:
            tokenCounts[token] = 0
        tokenCounts[token] = tokenCounts[token] + 1

    return tokenCounts
コード例 #12
0
ファイル: deduper.py プロジェクト: AnnuSachan/tweetmotif
def choose_multi_label(labels, lang_model):
  longest = util.argmax(labels, scorer=lambda ngram: len(ngram))
  if len(longest) > 3:
    
    best = util.argmax(bigrams.trigrams(longest), lambda ng: lang_model.lidstone(ng))
    best = (best,)
  elif len(longest) == 3:
    best = longest
    best = (best,)
  elif len(longest) <= 2:
    # this is kinda shitty set of them .. would rather want all possible skip n-grams (O(N^2) of them?)
    z = [(tuple(x),) for x in labels] + bigrams.bigrams(labels) + bigrams.trigrams(labels)
    assert z
    z = [x for x in z if len(util.flatten(x)) <= 3]
    # sum is too weird
    # lexicographic ordering of the top-ranked sublabels in the multilabel
    def scorer(ngrams):
      scores = [lang_model.lidstone(ng) for ng in ngrams]
      if len(scores) < 3:
        scores += [0]*(3 - len(scores))
      scores.sort(reverse=True)
      # print "SCORE %-30s %s" % (scores, ngrams)
      return scores
    z.sort(key= scorer, reverse=True)
    # print "RANKING",z
    best = z[0]
  else:
    assert False
  return best
コード例 #13
0
ファイル: lcd.py プロジェクト: N8body/pcd8544
def smooth_hscroll(string, row, iterations, delay=0.2, font=default_FONT):
    """ scrolls string at given row """
    bytes = list(flatten(map(lambda c: font[c] + [0x00], string)))
    for i in xrange(iterations):
        position(0, row)
        data(bytes[i:i+84])
        time.sleep(delay)
コード例 #14
0
ファイル: line.py プロジェクト: renemilk/slbot
 def min_value(self):
     if self.min_scale_value:
         return self.min_scale_value
     data = map(itemgetter("data"), self.data)
     if self.stacked:
         data = self.get_cumulative_data()
     return min(flatten(data))
コード例 #15
0
ファイル: fuf.py プロジェクト: Sandy4321/nltk_contrib
    def __init__(self, fsinput, fsgrammar, table=None):
        """
        Initialize and return the object.

        @param fsinput: The input feature structure
        @type fsinput: C{nltk.featstruct.FeatStruct}
        @param fsgrammar: The generation grammar
        @type fsgrammar: C{nltk.featstruct.FeatStruct}
        @param table: The feature value type table
        @type table: C{fstypes.FeatureTypeTable}
        """
        import copy
        self.fsinput = fsinput
        self.fsgrammar = fsgrammar
        self.table = table
        self.lr = LinkResolver()
        self.gpr = GrammarPathResolver(copy.deepcopy(fsgrammar), table)

        self.grammar_paths = flatten(self.gpr.resolve(copy.deepcopy(fsgrammar)))

        # the type table has been passed in
        # assign types to the feature values
        if table:
            for i, path in enumerate(self.grammar_paths):
                path = assign_types(table, path)
                self.grammar_paths[i] = path
コード例 #16
0
ファイル: api.py プロジェクト: nate-parrott/fast-news
def featured_sources_by_category(category=None):
    q = Source.query(Source.featured_priority < 1)
    if category: q = q.filter(Source.categories == category)
    q = q.order(Source.featured_priority)
    sources = q.fetch(400)

    categories = util.unique_ordered_list(util.flatten(s.categories for s in sources))
    if category and category not in categories: categories.append(category)
    
    category_order = {category: i for i, category in enumerate(["Newspapers", "Culture", "Politics", "Tech", "Humor", "Local", "Longform"])}
    categories.sort(key=lambda x: category_order.get(x, 99999))

    sources_by_category = defaultdict(list)
    for source in sources:
        for category in source.categories:
            sources_by_category[category].append(source)

    max_items_per_category = 60 if category else 15
    for category, items in sources_by_category.items():
        sources_by_category[category] = items[:min(len(items), max_items_per_category)]

    category_jsons = []
    for category in categories:
        category_jsons.append({"id": category, "name": category, "sources": [s.json() for s in sources_by_category[category]]})

    return category_jsons
コード例 #17
0
ファイル: coref_model.py プロジェクト: qq547276542/e2e-coref
  def tensorize_example(self, example, is_training, oov_counts=None):
    clusters = example["clusters"]

    gold_mentions = sorted(tuple(m) for m in util.flatten(clusters))
    gold_mention_map = {m:i for i,m in enumerate(gold_mentions)}
    cluster_ids = np.zeros(len(gold_mentions))
    for cluster_id, cluster in enumerate(clusters):
      for mention in cluster:
        cluster_ids[gold_mention_map[tuple(mention)]] = cluster_id

    sentences = example["sentences"]
    num_words = sum(len(s) for s in sentences)
    speakers = util.flatten(example["speakers"])

    assert num_words == len(speakers)

    max_sentence_length = max(len(s) for s in sentences)
    max_word_length = max(max(max(len(w) for w in s) for s in sentences), max(self.config["filter_widths"]))
    word_emb = np.zeros([len(sentences), max_sentence_length, self.embedding_size])
    char_index = np.zeros([len(sentences), max_sentence_length, max_word_length])
    text_len = np.array([len(s) for s in sentences])
    for i, sentence in enumerate(sentences):
      for j, word in enumerate(sentence):
        current_dim = 0
        for k, (d, (s,l)) in enumerate(zip(self.embedding_dicts, self.embedding_info)):
          if l:
            current_word = word.lower()
          else:
            current_word = word
          if oov_counts is not None and current_word not in d:
            oov_counts[k] += 1
          word_emb[i, j, current_dim:current_dim + s] = util.normalize(d[current_word])
          current_dim += s
        char_index[i, j, :len(word)] = [self.char_dict[c] for c in word]

    speaker_dict = { s:i for i,s in enumerate(set(speakers)) }
    speaker_ids = np.array([speaker_dict[s] for s in speakers])

    doc_key = example["doc_key"]
    genre = self.genres[doc_key[:2]]

    gold_starts, gold_ends = self.tensorize_mentions(gold_mentions)

    if is_training and len(sentences) > self.config["max_training_sentences"]:
      return self.truncate_example(word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids)
    else:
      return word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids
コード例 #18
0
 def fallbackGetCuboid(self, getBlock, *args):
     (x0,y0,z0,x1,y1,z1) = map(lambda x:int(math.floor(float(x))), flatten(args))
     out = []
     for y in range(min(y0,y1),max(y0,y1)+1):
         for x in range(min(x0,x1),max(x0,x1)+1):
             for z in range(min(z0,z1),max(z0,z1)+1):
                 out.append(getBlock(x,y,z))                    
     return out
コード例 #19
0
	def create_tfidf_vector(self):
		count_vect = CountVectorizer()
		doc = map(lambda x: " ".join(flatten(x)) + " " + \
				x[0], self.goal_actions_map.items())
		X_train_counts = count_vect.fit_transform(doc)
		tfidf_transformer = TfidfTransformer()
		X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
		return X_train_tfidf
コード例 #20
0
ファイル: reducer.py プロジェクト: plum-umd/pasket
def reduce_anno(smpls, tmpl):
  for cls in util.flatten_classes(tmpl.classes, "inners"):
    for fld in cls.flds:
      reduce_anno_fld(smpls, tmpl, cls, fld)
    for mtd in cls.mtds:
      reduce_anno_mtd(smpls, tmpl, cls, mtd)
      red_s = map(partial(reduce_anno_s, tmpl, cls, mtd), mtd.body)
      mtd.body = util.flatten(red_s)
コード例 #21
0
def test_helper2():
    statement = GeneralizedOr(Symbol("a"), Symbol("a"))
    # need to manually set it to this as otherwise the constructor would flatten it automatically
    statement.args[0] = Or(And(Symbol("b"), Not(Symbol("c"))), And(Symbol("c"), Not(Symbol("b"))))
    new_statement, change = util.flatten(copy.deepcopy(statement))
    assert_equal(new_statement, GeneralizedOr(Symbol("a"), And(Symbol("b"), Not(Symbol("c"))),
                                              And(Symbol("c"), Not(Symbol("b")))))
    assert_true(change)
コード例 #22
0
 def get_date(self):
     self._parse_doc()
     for page_nr in xrange(len(self._pages)):
         page_text = '\n'.join([x for x in flatten(self._pages[page_nr]) if isinstance(x,unicode)])
         m = re.search('\d{1,2}\.\d{1,2}\.\d{4}',page_text)
         try:
             return datetime.datetime.strptime(m.group(0),'%d.%m.%Y').date()
         except AttributeError:
             pass
     return None
コード例 #23
0
ファイル: ranking.py プロジェクト: AnnuSachan/tweetmotif
def query_refinement(orig_q, topic):
  if topic.ngram == ("**EXTRAS**",): return None
  subquery = topic.label.replace("/ ","")
  if any(AllJunkLike.match(term) for term in util.flatten(topic.label_ngrams)):
    # then twitter phrase search will drop that token. at least emoticons.
    # so fallback to non-phrase search
    pass
  elif len(subquery.split()) > 1:
    subquery = '"%s"' % subquery
  return orig_q + " " + subquery
コード例 #24
0
def stat3(datum=None):
    if not datum:
        datum = time.mktime(datetime.date.today().timetuple())
    response.set_header('Access-Control-Allow-Origin', '*')
    response.content_type = 'application/json'
    return json.dumps(
            util.flatten(finalna_verzija.StatPresloNaDrugu(
                time.mktime(datetime.datetime.strptime(datum, "%m-%d-%Y").timetuple())
                ,db)
            ))
コード例 #25
0
ファイル: serv.py プロジェクト: davidhoward/aspdb
 def extract_request(self):
     path = self.request.path
     logging.debug("path = %s" % path)
     query = self.request.query_string
     specname = path.split('/')[1]
     logging.debug("specname = %s" % specname)
     d = urlparse.parse_qs(query)
     d = util.flatten(d)
     dd = parse_proper(d)
     return specname, dd
コード例 #26
0
ファイル: depgraph.py プロジェクト: pbiggar/gin
  def node_roots(self, n):
    """Returns the set of roots which reach N."""
    result = []
    deps = self.dependencies(n)

    # Base case
    if len(deps) == 0:
      return [n]

    # Recursive case
    return util.flatten([self.node_roots(d) for d in deps])
コード例 #27
0
ファイル: textplan.py プロジェクト: DrDub/pypolibox
def __bottom_up_search(messages, rules):
    """generate_text() helper method which performs recursive best-first-search

    :param messages: a set containing ``Message``s and/or ``ConstituentSet``s
    :type messages: ``set`` of ``Message``s or ``ConstituentSet``s
    
    :param rules: a list of ``Rule``s specifying relationships which can hold 
    between the messages
    :type rules: ``list`` of ``Rule``s
        
    :return: a set containing one ``Message``, i.e. the first valid plan reached
    by best-first-search. returns None if no valid plan is found.
    :rtype: ``NoneType`` or a ``set`` of (``Message``s or ``ConstituentSet``s)
    """
    if len(messages) == 1:
        return messages
    elif len(messages) < 1:
        raise Exception('Error: Input contains no messages.')
    else:
        try:
            options = [rule.get_options(messages) for rule in rules]
        except:
            raise Exception('ERROR: Rule {0} had trouble with these ' \
                            'messages: {1}'.format(rule, messages))
            
        options = flatten(options)
        options_list = []
        for x, y, z in options:
            y.freeze()
            options_list.append( (x, y, z) )
            
        if options_list == []:
            return None

        #sort all options by their score, beginning with the highest one
        sorted_options = sorted(options_list, key = lambda (x,y,z): x, 
                                reverse=True) 
                                
        for (score, rst_relation, removes) in sorted_options:
            """
            rst_relation: a ConstituentSet (RST relation) that was generated by
                Rule.get_options()
            removes: a list containing those messages that are now part of 
                'rst_relation' and should therefore not be used again
            """
            testSet = messages - set(removes)
            testSet = testSet.union(set([rst_relation]))
            # a set containing a ConstituentSet and one or more Messages that 
            # haven't been integrated into a structure yet

            ret = __bottom_up_search(testSet, rules)
            if ret:
                return ret
        return None
コード例 #28
0
ファイル: model.py プロジェクト: whipermr5/countmeinbot
 def generate_respondents_summary(self):
     all_uids_by_option = [option.people.keys() for option in self.options]
     all_uids = util.flatten(all_uids_by_option)
     num_respondents = len(set(all_uids))
     if num_respondents == 0:
         output = 'Nobody responded'
     elif num_respondents == 1:
         output = '1 person responded'
     else:
         output = '{} people responded'.format(num_respondents)
     return output
コード例 #29
0
 def test_import_similar_lobbyists(self):
     """Slightly different lobbyists are inserted into different rows."""
     filings = list(lobbyists.parse_filings(util.testpath('lobbyists_slightly_different.xml')))
     con = sqlite3.connect(':memory:')
     con = lobbyists.create_db(con)
     cur = con.cursor()
     self.failUnless(lobbyists.import_filings(cur, filings))
     cur = con.cursor()
     cur.execute('SELECT id FROM lobbyist')
     lobbyers = util.flatten([x['lobbyists'] for x in filings if 'lobbyists' in x])
     self.failUnlessEqual(len(cur.fetchall()), len(lobbyers))
コード例 #30
0
ファイル: log.py プロジェクト: bhumish/Taboot
    def orig_func_wraper(msg, *args):
        # Take the callers name and snap it in two, result is log
        # level, e.g.: log_debug is DEBUG level.
        log_level = origfunc.__name__.split("_")[1]

        import log
        if getattr(log, "LOG_%s" % log_level.upper()) <= \
                log.LOG_LEVEL_CURRENT:
            # flatten and stringify the positional params so we don't
            # tuple() a tuple or an array and end up with
            # weirdness.
            a = map(str, util.flatten(args))
            print_log_msg(log_level, str(msg) % tuple(a))
コード例 #31
0
def model_fn(features, labels, mode, params):
    x = tf.reshape(features, [-1, 99, 161, 1], name='input_incep8')
    x_norm = tf.layers.batch_normalization(
        x, training=mode == tf.estimator.ModeKeys.TRAIN, name='x_norm')
    if params['verbose_summary']:
        tf.summary.image('input', x)

    conv1 = tf.layers.conv2d(x_norm,
                             filters=16,
                             kernel_size=3,
                             padding='same',
                             activation=tf.nn.relu,
                             name='conv1')
    conv1b = tf.layers.conv2d(conv1,
                              filters=16,
                              kernel_size=3,
                              activation=tf.nn.relu,
                              name='conv1b')
    pool1 = tf.layers.max_pooling2d(conv1b,
                                    pool_size=[2, 2],
                                    strides=2,
                                    name='pool1')
    if params['verbose_summary']:
        log_conv_kernel('conv1')
        log_conv_kernel('conv1b')
        tf.summary.image('pool1', pool1[:, :, :, 0:1])

    incep2 = inception_block(pool1,
                             t1x1=8,
                             t3x3=8,
                             t5x5=8,
                             tmp=8,
                             name='incep2')

    conv3 = tf.layers.conv2d(incep2,
                             filters=32,
                             kernel_size=3,
                             padding='same',
                             activation=tf.nn.relu,
                             name='conv3')
    conv3b = tf.layers.conv2d(conv3,
                              filters=32,
                              kernel_size=3,
                              activation=tf.nn.relu,
                              name='conv3b')
    pool3 = tf.layers.max_pooling2d(conv3b,
                                    pool_size=[2, 2],
                                    strides=2,
                                    name='pool3')
    if params['verbose_summary']:
        log_conv_kernel('conv3')
        log_conv_kernel('conv3b')
        tf.summary.image('pool3', pool3[:, :, :, 0:1])

    conv5 = tf.layers.conv2d(pool3,
                             filters=64,
                             kernel_size=3,
                             padding='same',
                             activation=tf.nn.relu,
                             name='conv5')
    conv5b = tf.layers.conv2d(conv5,
                              filters=64,
                              kernel_size=3,
                              activation=tf.nn.relu,
                              name='conv5b')
    pool5 = tf.layers.max_pooling2d(conv5b,
                                    pool_size=[2, 2],
                                    strides=2,
                                    name='pool5')
    if params['verbose_summary']:
        log_conv_kernel('conv5')
        log_conv_kernel('conv5b')
        tf.summary.image('pool5', pool5[:, :, :, 0:1])

    incep6 = inception_block(pool5,
                             t1x1=32,
                             t3x3=32,
                             t5x5=32,
                             tmp=32,
                             name='incep6')

    conv7 = tf.layers.conv2d(incep6,
                             filters=128,
                             kernel_size=3,
                             padding='same',
                             activation=tf.nn.relu,
                             name='conv7')
    conv7b = tf.layers.conv2d(conv7,
                              filters=128,
                              kernel_size=3,
                              activation=tf.nn.relu,
                              name='conv7b')
    pool7 = tf.layers.max_pooling2d(conv7b,
                                    pool_size=[2, 2],
                                    strides=2,
                                    name='pool7')
    if params['verbose_summary']:
        log_conv_kernel('conv7')
        log_conv_kernel('conv7b')
        tf.summary.image('pool7', pool7[:, :, :, 0:1])

    incep8 = inception_block(pool7,
                             t1x1=64,
                             t3x3=64,
                             t5x5=64,
                             tmp=64,
                             name='incep8')

    conv9 = tf.layers.conv2d(incep8,
                             filters=256,
                             kernel_size=3,
                             padding='same',
                             activation=tf.nn.relu,
                             name='conv9')
    conv9b = tf.layers.conv2d(conv9,
                              filters=256,
                              kernel_size=3,
                              activation=tf.nn.relu,
                              name='conv9b')
    pool9 = tf.layers.max_pooling2d(conv9b,
                                    pool_size=[2, 2],
                                    strides=2,
                                    name='pool9')
    if params['verbose_summary']:
        log_conv_kernel('conv9')
        log_conv_kernel('conv9b')
        tf.summary.image('pool9', pool9[:, :, :, 0:1])

    flat = flatten(pool9)
    dropout4 = tf.layers.dropout(flat,
                                 rate=params['dropout_rate'],
                                 training=mode == tf.estimator.ModeKeys.TRAIN,
                                 name='dropout4')
    dense4 = tf.layers.dense(dropout4,
                             units=2048,
                             activation=tf.nn.relu,
                             name='dense4')

    logits = tf.layers.dense(dense4,
                             units=params['num_classes'],
                             name='logits')

    predictions = {
        'classes': tf.argmax(logits, axis=1, name='prediction_classes'),
        'probabilities': tf.nn.softmax(logits, name='prediction_softmax')
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions={'predictions': predictions['probabilities']})

    onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32),
                               depth=params['num_classes'],
                               name='onehot_labels')
    loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,
                                           logits=logits)
    tf.summary.scalar('loss', loss)

    optimizer = tf.train.GradientDescentOptimizer(
        learning_rate=params['learning_rate'])
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss=loss,
                                      global_step=tf.train.get_global_step())
    eval_metric_ops = {
        'accuracy':
        tf.metrics.accuracy(labels=labels, predictions=predictions['classes'])
    }

    tf.summary.scalar('accuracy', eval_metric_ops['accuracy'][1])

    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
コード例 #32
0
def home():
    commands = ['G28 X Y', 'G28 Z']
    return flatten(commands)
コード例 #33
0
def get(thing):
    commands = [lift(50), goto(thing), lower(), lift(43), lower(), lift()]
    return flatten(commands)
コード例 #34
0
def get_inputs(script):
    inputs = deque([convert_to_ascii(cmd) for cmd in script])
    inputs = deque(util.flatten(inputs))
    return inputs
コード例 #35
0
def find(smpls, what, cond):
    lst = map(op.methodcaller("find", what, cond), smpls)
    return set(util.flatten(lst))
コード例 #36
0
ファイル: flatv1.py プロジェクト: formigone/ml-engine
def model_fn(features, labels, mode, params):
    x = tf.reshape(features, [-1, 125, 128, 1], name='input_flatv1')
    x_flat = tf.reshape(features, [-1, 16000])
    x_norm = tf.layers.batch_normalization(
        x, training=mode == tf.estimator.ModeKeys.TRAIN, name='x_norm')
    if params['verbose_summary']:
        tf.summary.image('input', x)
        tf.summary.audio('input', x_flat, 16000)

    conv1 = tf.layers.conv2d(x_norm,
                             filters=16,
                             kernel_size=3,
                             activation=tf.nn.relu,
                             name='conv1')
    conv2 = tf.layers.conv2d(conv1,
                             filters=32,
                             kernel_size=3,
                             activation=tf.nn.relu,
                             name='conv2')
    conv3 = tf.layers.conv2d(conv2,
                             filters=64,
                             kernel_size=3,
                             activation=tf.nn.relu,
                             name='conv3')
    pool3 = tf.layers.max_pooling2d(conv3,
                                    pool_size=[2, 2],
                                    strides=2,
                                    name='pool3')
    if params['verbose_summary']:
        for i in range(1, 4):
            label = 'conv{}'.format(i)
            graph_utils.log_conv_kernel(label)
            tf.summary.image(label, tf.expand_dims(conv1[..., 0], -1))
        tf.summary.image('pool3', pool3[:, :, :, 0:1])

    conv4 = tf.layers.conv2d(pool3,
                             filters=128,
                             kernel_size=3,
                             activation=tf.nn.relu,
                             name='conv4')
    conv5 = tf.layers.conv2d(conv4,
                             filters=256,
                             kernel_size=3,
                             activation=tf.nn.relu,
                             name='conv5')
    conv6 = tf.layers.conv2d(conv5,
                             filters=512,
                             kernel_size=3,
                             activation=tf.nn.relu,
                             name='conv6')
    pool6 = tf.layers.max_pooling2d(conv6,
                                    pool_size=[2, 2],
                                    strides=2,
                                    name='pool6')
    if params['verbose_summary']:
        for i in range(4, 7):
            label = 'conv{}'.format(i)
            graph_utils.log_conv_kernel(label)
            tf.summary.image(label, tf.expand_dims(conv1[..., 0], -1))
        tf.summary.image('pool6', pool6[:, :, :, 0:1])

    conv7 = tf.layers.conv2d(pool6,
                             filters=1024,
                             kernel_size=3,
                             activation=tf.nn.relu,
                             name='conv7')
    conv8 = tf.layers.conv2d(conv7,
                             filters=1024,
                             kernel_size=5,
                             activation=tf.nn.relu,
                             name='conv8')
    conv9 = tf.layers.conv2d(conv8,
                             filters=1024,
                             kernel_size=7,
                             activation=tf.nn.relu,
                             name='conv9')
    pool9 = tf.layers.max_pooling2d(conv9,
                                    pool_size=[2, 2],
                                    strides=2,
                                    name='pool9')
    if params['verbose_summary']:
        for i in range(7, 10):
            label = 'conv{}'.format(i)
            graph_utils.log_conv_kernel(label)
            tf.summary.image(label, tf.expand_dims(conv1[..., 0], -1))
        tf.summary.image('pool9', pool9[:, :, :, 0:1])

    conv10 = tf.layers.conv2d(pool9,
                              filters=512,
                              kernel_size=1,
                              activation=tf.nn.relu,
                              name='conv10')
    conv11 = tf.layers.conv2d(conv10,
                              filters=512,
                              kernel_size=1,
                              activation=tf.nn.relu,
                              name='conv11')
    conv12 = tf.layers.conv2d(conv11,
                              filters=512,
                              kernel_size=1,
                              activation=tf.nn.relu,
                              name='conv12')

    flat = flatten(conv12)
    dense = tf.layers.dense(flat,
                            units=1024,
                            activation=tf.nn.relu,
                            name='dense')
    dropout = tf.layers.dropout(dense,
                                rate=params['dropout_rate'],
                                training=mode == tf.estimator.ModeKeys.TRAIN,
                                name='dropout')

    logits = tf.layers.dense(dropout, units=12, name='logits')

    predictions = {
        'classes': tf.argmax(logits, axis=1, name='prediction_classes'),
        'probabilities': tf.nn.softmax(logits, name='prediction_softmax')
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions={'predictions': predictions['probabilities']})

    onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32),
                               depth=12,
                               name='onehot_labels')
    loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels,
                                           logits=logits)
    tf.summary.scalar('loss', loss)

    optimizer = tf.train.GradientDescentOptimizer(
        learning_rate=params['learning_rate'])
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss=loss,
                                      global_step=tf.train.get_global_step())
    eval_metric_ops = {
        'accuracy':
        tf.metrics.accuracy(labels=labels, predictions=predictions['classes'])
    }

    tf.summary.scalar('accuracy', eval_metric_ops['accuracy'][1])

    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss,
                                      train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)
コード例 #37
0
def run(data_file, is_train=False, **args):
    is_test = not is_train
    batchsize = args['batchsize']
    model_name = args['model_name']
    optimizer_name = args['optimizer']
    save_dir = args['save_dir']
    print args
    if save_dir[-1] != '/':
        save_dir = save_dir + '/'

    # TODO: check save_dir exist
    if not os.path.isdir(save_dir):
        err_msg = 'There is no dir : {}\n'.format(save_dir)
        err_msg += '##############################\n'
        err_msg += '## Please followiing: \n'
        err_msg += '## $ mkdir {}\n'.format(save_dir)
        err_msg += '##############################\n'
        raise ValueError(err_msg)

    save_name = args['save_name']
    if save_name == '':
        save_name = '_'.join([model_name, optimizer_name])

    save_name = save_dir + save_name

    xp = cuda.cupy if args['gpu'] >= 0 else np
    if args['gpu'] >= 0:
        cuda.get_device(args['gpu']).use()
        xp.random.seed(1234)

    # load files
    dev_file = args['dev_file']
    test_file = args['test_file']
    delimiter = args['delimiter']
    sentences_train = []
    if is_train:
        sentences_train = util.read_conll_file(filename=data_file,
                                               delimiter=delimiter,
                                               input_idx=0,
                                               output_idx=-1)
        if len(sentences_train) == 0:
            s = str(len(sentences_train))
            err_msg = 'Invalid training sizes: {} sentences. '.format(s)
            raise ValueError(err_msg)
    else:
        # Predict
        sentences_train = util.read_raw_file(filename=data_file,
                                             delimiter=u' ')

    # sentences_train = sentences_train[:100]

    sentences_dev = []
    sentences_test = []
    if dev_file:
        sentences_dev = util.read_conll_file(dev_file,
                                             delimiter=delimiter,
                                             input_idx=0,
                                             output_idx=-1)
    if test_file:
        sentences_test = util.read_conll_file(test_file,
                                              delimiter=delimiter,
                                              input_idx=0,
                                              output_idx=-1)

    save_vocab = save_name + '.vocab'
    save_vocab_char = save_name + '.vocab_char'
    save_tags_vocab = save_name + '.vocab_tag'
    save_train_config = save_name + '.train_config'

    # TODO: check unkown pos tags
    # TODO: compute unk words
    if is_train:
        sentences_words_train = [w_obj[0] for w_obj in sentences_train]
        vocab = util.build_vocab(sentences_words_train)
        vocab_char = util.build_vocab(util.flatten(sentences_words_train))
        vocab_tags = util.build_tag_vocab(sentences_train)
    elif is_test:
        vocab = util.load_vocab(save_vocab)
        vocab_char = util.load_vocab(save_vocab_char)
        vocab_tags = util.load_vocab(save_tags_vocab)

    PAD_IDX = vocab[PADDING]
    UNK_IDX = vocab[UNKWORD]

    CHAR_PAD_IDX = vocab_char[PADDING]
    CHAR_UNK_IDX = vocab_char[UNKWORD]

    def parse_to_word_ids(sentences):
        return util.parse_to_word_ids(sentences,
                                      xp=xp,
                                      vocab=vocab,
                                      UNK_IDX=UNK_IDX,
                                      idx=0)

    def parse_to_char_ids(sentences):
        return util.parse_to_char_ids(sentences,
                                      xp=xp,
                                      vocab_char=vocab_char,
                                      UNK_IDX=CHAR_UNK_IDX,
                                      idx=0)

    def parse_to_tag_ids(sentences):
        return util.parse_to_tag_ids(sentences,
                                     xp=xp,
                                     vocab=vocab_tags,
                                     UNK_IDX=-1,
                                     idx=-1)

    # if is_train:
    x_train = parse_to_word_ids(sentences_train)
    x_char_train = parse_to_char_ids(sentences_train)
    y_train = parse_to_tag_ids(sentences_train)

    # elif is_test:
    #     x_predict = parse_to_word_ids(sentences_predict)
    #     x_char_predict = parse_to_char_ids(sentences_predict)
    #     y_predict = parse_to_tag_ids(sentences_predict)

    x_dev = parse_to_word_ids(sentences_dev)
    x_char_dev = parse_to_char_ids(sentences_dev)
    y_dev = parse_to_tag_ids(sentences_dev)

    x_test = parse_to_word_ids(sentences_test)
    x_char_test = parse_to_char_ids(sentences_test)
    y_test = parse_to_tag_ids(sentences_test)

    cnt_train_unk = sum([xp.sum(d == UNK_IDX) for d in x_train])
    cnt_train_word = sum([d.size for d in x_train])
    unk_train_unk_rate = float(cnt_train_unk) / cnt_train_word

    cnt_dev_unk = sum([xp.sum(d == UNK_IDX) for d in x_dev])
    cnt_dev_word = sum([d.size for d in x_dev])
    unk_dev_unk_rate = float(cnt_dev_unk) / max(cnt_dev_word, 1)

    logging.info('train:' + str(len(x_train)))
    logging.info('dev  :' + str(len(x_dev)))
    logging.info('test :' + str(len(x_test)))
    logging.info('vocab     :' + str(len(vocab)))
    logging.info('vocab_tags:' + str(len(vocab_tags)))
    logging.info('unk count (train):' + str(cnt_train_unk))
    logging.info('unk rate  (train):' + str(unk_train_unk_rate))
    logging.info('cnt all words (train):' + str(cnt_train_word))
    logging.info('unk count (dev):' + str(cnt_dev_unk))
    logging.info('unk rate  (dev):' + str(unk_dev_unk_rate))
    logging.info('cnt all words (dev):' + str(cnt_dev_word))
    # show model config
    logging.info('######################')
    logging.info('## Model Config')
    logging.info('model_name:' + str(model_name))
    logging.info('batchsize:' + str(batchsize))
    logging.info('optimizer:' + str(optimizer_name))
    # Save model config
    logging.info('######################')
    logging.info('## Model Save Config')
    logging.info('save_dir :' + str(save_dir))

    # save vocab
    logging.info('save_vocab        :' + save_vocab)
    logging.info('save_vocab_char   :' + save_vocab_char)
    logging.info('save_tags_vocab   :' + save_tags_vocab)
    logging.info('save_train_config :' + save_train_config)
    util.write_vocab(save_vocab, vocab)
    util.write_vocab(save_vocab_char, vocab_char)
    util.write_vocab(save_tags_vocab, vocab_tags)
    util.write_vocab(save_train_config, args)

    net = BiLSTM_CNN_CRF(n_vocab=len(vocab),
                         n_char_vocab=len(vocab_char),
                         emb_dim=args['n_word_emb'],
                         hidden_dim=args['n_hidden'],
                         n_layers=args['n_layer'],
                         init_emb=None,
                         n_label=len(vocab_tags))

    if args['word_emb_file']:
        # set Pre-trained embeddings
        # emb_file = './emb/glove.6B.100d.txt'
        emb_file = args['word_emb_file']
        word_ids, word_vecs = util.load_glove_embedding(emb_file, vocab)
        net.word_embed.W.data[word_ids] = word_vecs

    if args['gpu'] >= 0:
        net.to_gpu()

    init_alpha = args['init_lr']
    if optimizer_name == 'adam':
        opt = optimizers.Adam(alpha=init_alpha, beta1=0.9, beta2=0.9)
    elif optimizer_name == 'adadelta':
        opt = optimizers.AdaDelta()
    if optimizer_name == 'sgd_mom':
        opt = optimizers.MomentumSGD(lr=init_alpha, momentum=0.9)
    if optimizer_name == 'sgd':
        opt = optimizers.SGD(lr=init_alpha)

    opt.setup(net)
    opt.add_hook(chainer.optimizer.GradientClipping(5.0))

    def eval_loop(x_data, x_char_data, y_data):
        # dev or test
        net.set_train(train=False)
        iteration_list = range(0, len(x_data), batchsize)
        perm = np.random.permutation(len(x_data))
        sum_loss = 0.0
        predict_lists = []
        for i_index, index in enumerate(iteration_list):
            data = [(x_data[i], x_char_data[i], y_data[i])
                    for i in perm[index:index + batchsize]]
            x, x_char, target_y = zip(*data)

            output = net(x_data=x, x_char_data=x_char)
            predict, loss = net.predict(output, target_y)

            sum_loss += loss.data
            predict_lists.extend(predict)
        return predict_lists, sum_loss

    if is_test:
        # predict
        model_filename = args['model_filename']
        model_filename = save_dir + model_filename
        serializers.load_hdf5(model_filename, net)

        vocab_tags_inv = dict([(v, k) for k, v in vocab_tags.items()])
        x_predict = x_train
        x_char_predict = x_char_train
        y_predict = y_train
        predict_pairs, _ = eval_loop(x_predict, x_char_predict, y_predict)
        _, predict_tags = zip(*predict_pairs)
        for predict in predict_tags:
            predict = [vocab_tags_inv[tag_idx] for tag_idx in to_cpu(predict)]
            print predict

        return False

    tmax = args['max_iter']
    t = 0.0
    for epoch in xrange(args['max_iter']):

        # train
        net.set_train(train=True)
        iteration_list = range(0, len(x_train), batchsize)
        perm = np.random.permutation(len(x_train))
        sum_loss = 0.0
        predict_train = []
        for i_index, index in enumerate(iteration_list):
            data = [(x_train[i], x_char_train[i], y_train[i])
                    for i in perm[index:index + batchsize]]
            x, x_char, target_y = zip(*data)

            output = net(x_data=x, x_char_data=x_char)
            predict, loss = net.predict(output, target_y)

            # loss
            sum_loss += loss.data

            # update
            net.zerograds()
            loss.backward()
            opt.update()

            predict_train.extend(predict)

        # Evaluation
        train_accuracy = util.eval_accuracy(predict_train)

        logging.info('epoch:' + str(epoch))
        logging.info(' [train]')
        logging.info('  loss     :' + str(sum_loss))
        logging.info('  accuracy :' + str(train_accuracy))

        # Dev
        predict_dev, loss_dev = eval_loop(x_dev, x_char_dev, y_dev)

        # Evaluation
        dev_accuracy = util.eval_accuracy(predict_dev)
        logging.info(' [dev]')
        logging.info('  loss     :' + str(loss_dev))
        logging.info('  accuracy :' + str(dev_accuracy))

        # Save model
        model_filename = save_name + '_epoch' + str(epoch)
        serializers.save_hdf5(model_filename + '.model', net)
        serializers.save_hdf5(model_filename + '.state', opt)
コード例 #38
0
    def finalize(self):
        # finalized: segments, segment_subtoken_map
        # populate speakers from info
        subtoken_idx = 0
        for segment in self.segment_info:
            speakers = []
            for i, tok_info in enumerate(segment):
                if tok_info is None and (i == 0 or i == len(segment) - 1):
                    speakers.append('[SPL]')
                elif tok_info is None:
                    speakers.append(speakers[-1])
                else:
                    speakers.append(tok_info[9])
                    if tok_info[4] == 'PRP':
                        self.pronouns.append(subtoken_idx)
                subtoken_idx += 1
            self.speakers += [speakers]
        # populate sentence map

        # populate clusters
        first_subtoken_index = -1
        for seg_idx, segment in enumerate(self.segment_info):
            speakers = []
            for i, tok_info in enumerate(segment):
                first_subtoken_index += 1
                coref = tok_info[-2] if tok_info is not None else '-'
                if coref != "-":
                    last_subtoken_index = first_subtoken_index + tok_info[
                        -1] - 1
                    for part in coref.split("|"):
                        if part[0] == "(":
                            if part[-1] == ")":
                                cluster_id = int(part[1:-1])
                                self.clusters[cluster_id].append(
                                    (first_subtoken_index,
                                     last_subtoken_index))
                            else:
                                cluster_id = int(part[1:])
                                self.coref_stacks[cluster_id].append(
                                    first_subtoken_index)
                        else:
                            cluster_id = int(part[:-1])
                            start = self.coref_stacks[cluster_id].pop()
                            self.clusters[cluster_id].append(
                                (start, last_subtoken_index))
        # merge clusters
        merged_clusters = []
        for c1 in self.clusters.values():
            existing = None
            for m in c1:
                for c2 in merged_clusters:
                    if m in c2:
                        existing = c2
                        break
                if existing is not None:
                    break
            if existing is not None:
                print("Merging clusters (shouldn't happen very often.)")
                existing.update(c1)
            else:
                merged_clusters.append(set(c1))
        merged_clusters = [list(c) for c in merged_clusters]
        all_mentions = util.flatten(merged_clusters)
        sentence_map = get_sentence_map(self.segments, self.sentence_end)
        subtoken_map = util.flatten(self.segment_subtoken_map)
        assert len(all_mentions) == len(set(all_mentions))
        num_words = len(util.flatten(self.segments))
        assert num_words == len(util.flatten(self.speakers))
        assert num_words == len(subtoken_map), (num_words, len(subtoken_map))
        assert num_words == len(sentence_map), (num_words, len(sentence_map))
        return {
            "doc_key": self.doc_key,
            "sentences": self.segments,
            "speakers": self.speakers,
            "constituents": [],
            "ner": [],
            "clusters": merged_clusters,
            'sentence_map': sentence_map,
            "subtoken_map": subtoken_map,
            'pronouns': self.pronouns
        }
コード例 #39
0
def goto(thing):
    if thing not in thing_locations:
        raise (Exception('I don\'t know where {} is'.format(thing)))
    location = thing_locations[thing]
    commands = ['G1 X{} Y{} F3600'.format(location['X'], location['Y'])]
    return flatten(commands)
コード例 #40
0
ファイル: filtering.py プロジェクト: zhangzhensong/dblp
 def papers_file(self):
     for file_obj in util.flatten(self.input()):
         if 'paper' in file_obj.path:
             return file_obj
コード例 #41
0
    		end =  file_mapping[span[1]][1]
    		temp += [[start,end]]
    	bert_clusters += [temp]
    example['sentences'] =  bert_tokenized
    example['clusters'] =  bert_clusters
    # json.dump(example, fp)
    train += [example]



with open('test_english.pickle', 'wb') as handle:
    pickle.dump(train, handle, protocol=pickle.HIGHEST_PROTOCOL)



gold_mentions = sorted(tuple(m) for m in util.flatten(clusters))
gold_mention_map = {m:i for i,m in enumerate(gold_mentions)}

bert_cluster = train[-1]['clusters']
bert_tokenized = train[-1]['sentences']
bert_tokens =  []
for bert_token in bert_tokenized : 
        bert_tokens += bert_token
    

bert_mentions = sorted(tuple(m) for m in util.flatten(bert_cluster))
bert_gold_mention_map = {m:i for i,m in enumerate(bert_mentions)}


for i in range(len(gold_mentions)):
    span = gold_mentions[i]
コード例 #42
0
        smpl_files.extend(util.get_files_from_path(arg, "txt"))

    reset()
    smpls = []
    for fname in smpl_files:
        smpl = Sample(fname, lambda mname: mname.endswith("Event"))
        smpls.append(smpl)

    if opt.method:
        _decls = decls(smpls)
        for cname in _decls.keys():
            mnames = ", ".join(list(_decls[cname]))
            print "{}: {}".format(cname, mnames)

    if opt.event:
        _evts = util.flatten(map(op.attrgetter("evts"), smpls))
        for evt in _evts:
            print str(evt)

    if opt.obj:
        print "# max: {}\n".format(max_objs(smpls))
        _objs = objs(smpls)
        for cname in _objs.keys():
            instances = ", ".join(_objs[cname])
            print "{}: {}".format(cname, instances)

    if not sum([opt.method, opt.event, opt.obj]):
        for smpl in smpls:
            print "Sample: {}".format(smpl.name)
            print str(smpl)
コード例 #43
0
    def tensorize_example(self, example, is_training):
        clusters = example["clusters"]

        gold_mentions = sorted(tuple(m) for m in util.flatten(clusters))
        gold_mention_map = {m: i for i, m in enumerate(gold_mentions)}
        cluster_ids = np.zeros(len(gold_mentions))
        for cluster_id, cluster in enumerate(clusters):
            for mention in cluster:
                cluster_ids[gold_mention_map[tuple(mention)]] = cluster_id + 1

        sentences = example["sentences"]
        num_words = sum(len(s) for s in sentences)
        speakers = example["speakers"]
        # assert num_words == len(speakers), (num_words, len(speakers))
        speaker_dict = self.get_speaker_dict(util.flatten(speakers))
        sentence_map = example['sentence_map']

        max_sentence_length = self.max_segment_len
        text_len = np.array([len(s) for s in sentences])

        input_ids, input_mask, speaker_ids = [], [], []
        for i, (sentence, speaker) in enumerate(zip(sentences, speakers)):
            sent_input_ids = self.tokenizer.convert_tokens_to_ids(sentence)
            sent_input_mask = [1] * len(sent_input_ids)
            sent_speaker_ids = [speaker_dict.get(s, 3) for s in speaker]
            while len(sent_input_ids) < max_sentence_length:
                sent_input_ids.append(0)
                sent_input_mask.append(0)
                sent_speaker_ids.append(0)
            input_ids.append(sent_input_ids)
            speaker_ids.append(sent_speaker_ids)
            input_mask.append(sent_input_mask)
        input_ids = np.array(input_ids)
        input_mask = np.array(input_mask)
        speaker_ids = np.array(speaker_ids)
        assert num_words == np.sum(input_mask), (num_words, np.sum(input_mask))

        doc_key = example["doc_key"]
        self.subtoken_maps[doc_key] = example.get("subtoken_map", None)
        self.gold[doc_key] = example["clusters"]
        genre = self.genres.get(doc_key[:2], 0)

        gold_starts, gold_ends = self.tensorize_mentions(gold_mentions)
        example_tensors = (input_ids, input_mask, text_len, speaker_ids, genre,
                           is_training, gold_starts, gold_ends, cluster_ids,
                           sentence_map)

        if is_training and len(
                sentences) > self.config["max_training_sentences"]:
            if self.config['single_example']:
                return self.truncate_example(*example_tensors)
            else:
                offsets = range(self.config['max_training_sentences'],
                                len(sentences),
                                self.config['max_training_sentences'])
                tensor_list = [
                    self.truncate_example(*(example_tensors + (offset, )))
                    for offset in offsets
                ]
                return tensor_list
        else:
            return example_tensors
コード例 #44
0
 def evt_sources(self):
     srcss = map(op.attrgetter("sources"), self.evts)
     return util.rm_dup(util.flatten(srcss))
コード例 #45
0
    def learn(self, transitions, rewards):

        # Prepare for learning
        self.updateDicts()
        attributes = ["X_pos", "Y_pos", "X_size", "Y_size", "Colour", "Shape", "Nothing", "Reward"]
        model_updated = False
        if transitions and rewards:
            att_list = range(REWARD + 1)
        elif transitions and not rewards:
            att_list = range(REWARD)
        elif not transitions and rewards:
            att_list = [REWARD]
        else:
            return

        # For each object attribute or reward
        for i in att_list:

            # print("**************************")
            # print("Learning schemas for " + attributes[i])
            # print("**************************")


            remaining = dict(zip(self.data[i].keys(),[[] for key in self.data[i].keys()]))

            # For each attribute/reward value to be predicted
            for key in self.data[i].keys():

                # If the maximum number of schemas has already been learn we skip this round of learning
                if len(self.schemas[i][key]) >= LIMIT:
                    remaining[key] = self.data[i][key]
                    continue

                # If we are predicting rewards the learning data is constructed from all objects that have changed
                if i == REWARD:

                    # Form positive cases
                    xYes = []
                    xNo = []
                    for datum in self.data[i][key]:
                        predicted = False
                        for o in datum.keys():
                            if self.checkDatum([datum[o], key], i, consistency_check=True):
                                predicted = True
                                # self.evidence[i][key].append(datum)
                                break
                        if not predicted:
                            xYes += [datum[c] for c in self.obsChanges]
                            xNo += [datum[o]for o in datum.keys() if o not in self.obsChanges]

                            # if not self.checkDatum([datum[o], key], i)[0]:
                            #     xYes += [datum[c] for c in self.obsChanges]
                            #     xNo += [datum[o] for o in datum.keys() if o not in self.obsChanges]

                    # Form negative cases
                    for other in self.data[i].keys():
                        if other != key:
                            xNo += util.flatten([[datum[o] for o in datum.keys()] for datum in self.data[i][other] + self.evidence[i][other]])

                # Otherwise we construct learning data in the standard way
                else:

                    # Form positive cases
                    xYes = []
                    for datum in self.data[i][key]:
                        if datum[0][i] != key:
                            # if self.checkDatum([datum,key], i)[0]:
                            #     self.evidence[i][key].append(datum)
                            # else:
                            #     xYes.append(datum)
                            if not self.checkDatum([datum,key], i, consistency_check=True):
                                xYes.append(datum)

                    self.data[i][key] = [datum for datum in self.data[i][key] if datum not in self.evidence[i][key]]

                    # Form negative cases
                    xNo = [self.data[i][other] + self.evidence[i][other] for other in self.data[i].keys() if other != key]
                    xNo = util.flatten(xNo)

                # If there are no changes in this attribute of the primary object then we skip this round of learning
                if len(xYes) == 0:
                    remaining[key] = self.data[i][key]
                    # print("no changes for " + str(key))
                    continue

                # Form binary vectors for learning
                xYes = [util.toBinary(self, item) for item in xYes]
                xNo = [util.toBinary(self, item) for item in xNo]
                schemas = [util.toBinarySchema(self, schema) for schema in self.schemas[i][key]]
                oldSchemas = deepcopy(schemas)

                # print("Learning for " + str(key))

                # Learn and output schemas, new evidence, and remaining positive cases
                if i == REWARD:
                    [binarySchemas, _, _] = lern.learnSchemas(xYes, xNo, schemas, self.deterministic)
                else:
                    [binarySchemas, binaryEvidence, binaryRemaining] = lern.learnSchemas(xYes, xNo, schemas, self.deterministic)

                # print("111111111111111111")
                # print schemas
                # print("222222222222222222")
                # print binarySchemas
                # print("333333333333333333")

                # Name new schemas
                new_names = []
                new_schemas = [util.fromBinarySchema(self, s, key) for s in binarySchemas if s not in oldSchemas]
                for s in new_schemas:
                    s.name = self.num_schemas
                    new_names.append(s.name)
                    self.num_schemas += 1

                # Convert learnt schemas and evidence from binary output and add to model
                self.schemas[i][key] += new_schemas
                self.schemas[i][key] = util.simplify(self, self.schemas[i][key], key, attributes[i])

                # Get initial counts of and display new schemas
                new_printed = False
                for s in self.schemas[i][key]:
                    if s.name in new_names:
                        if not new_printed:
                            print("New schemas: ")
                            new_printed = True
                            model_updated = True
                        if not self.deterministic:
                            s.get_initial_counts(self, i)
                        print(attributes[i] + " = " + str(key) + " <- " + s.display(no_head=True))


            #     # If they are reward schemas then the binary evidence and remaining data are not in the correct form to be stored
            #     if i == REWARD:
            #         for datum in self.data[i][key]:
            #             predicted = False
            #             for o in datum.keys():
            #                 if self.checkDatum([datum[o], key], i)[0]:
            #                     predicted = True
            #                     self.evidence[i][key].append(datum)
            #                     break
            #             if not predicted:
            #                 remaining[key].append(datum)
            #
            #     # Otherwise we can convert directly back from the binary data and store the resukt
            #     else:
            #         self.evidence[i][key] += [util.fromBinary(self, datum) for datum in binaryEvidence]
            #         remaining[key] = [util.fromBinary(self, datum) for datum in binaryRemaining]
            #
            # self.data[i] = remaining

        return model_updated
コード例 #46
0
ファイル: predict.py プロジェクト: ishine/fanfiction-nlp
        docs = [json.loads(line) for line in lines]
        tensor_examples, stored_info = data_processor.get_tensor_examples_from_custom_input(docs)
        predicted_clusters, _, _ = runner.predict(model, tensor_examples)

        if args.output_path:
            with open(args.output_path, 'w') as f:
                for i, doc in enumerate(docs):
                    doc['predicted_clusters'] = predicted_clusters[i]
                    f.write(json.dumps(doc) + "\n")
            #print(f'Saved prediction in {args.output_path}')
    else:
        # Interactive input
        model.to(model.device)
        nlp = English()
        nlp.add_pipe(nlp.create_pipe('sentencizer'))
        while True:
            input_str = str(input('Input document:'))
            bert_tokenizer, spacy_tokenizer = data_processor.tokenizer, nlp
            doc = get_document_from_string(input_str, args.seg_len, bert_tokenizer, nlp)
            tensor_examples, stored_info = data_processor.get_tensor_examples_from_custom_input([doc])
            predicted_clusters, _, _ = runner.predict(model, tensor_examples)

            subtokens = util.flatten(doc['sentences'])
            #print('---Predicted clusters:')
            for cluster in predicted_clusters[0]:
                mentions_str = [' '.join(subtokens[m[0]:m[1]+1]) for m in cluster]
                mentions_str = [m.replace(' ##', '') for m in mentions_str]
                mentions_str = [m.replace('##', '') for m in mentions_str]
                #print(mentions_str)  # Print out strings
                # print(cluster)  # Print out indices
コード例 #47
0
 else:
     model.restore(sess)
 for j, line in enumerate(lines[1:]):
     parts = line.split('\t')
     example_id = parts[0].strip()
     text = parts[1].strip()
     doc = nlp(unicode(text))
     sentences = [[unicode(str(w)) for w in sent] for sent in doc.sents]
     example = {
         'sentences': sentences,
         'doc_key': 'nw',
         'speakers': [['' for _ in sent] for sent in doc.sents],
         'clusters': []
     }
     result = make_predictions(text, model, sess, example)
     words = util.flatten(result['sentences'])
     c = 0
     nameA = parts[4].strip()
     nameA_offset = int(parts[5].strip())
     nameB = parts[7].strip()
     nameB_offset = int(parts[8].strip())
     pronoun_char_offset = int(parts[3].strip())
     pronoun_index = None
     nameA_index = None
     nameB_index = None
     for k, token in enumerate(doc):
         if token.idx == pronoun_char_offset or (
                 pronoun_index is None and k + 1 < len(doc)
                 and doc[k + 1].idx > pronoun_char_offset):
             pronoun_index = token.i
         elif token.idx == nameA_offset or (
コード例 #48
0
def detip():
    commands = [lift(), goto('tipstop'), lift(109), lift()]
    return flatten(commands)
コード例 #49
0
def push():
    commands = ['G1 E45 F3600']
    return flatten(commands)
コード例 #50
0
def flam3_to_node(flame):
    n = util.unflatten(util.flatten(apply_structure(flame_structure, flame)))
    n['type'] = 'node'
    return n
コード例 #51
0
 def print_predictions(self, example):
     words = util.flatten(example["sentences"])
     for cluster in example["predicted_clusters"]:
         print(u"Predicted cluster: {}".format(
             [" ".join(words[m[0]:m[1] + 1]) for m in cluster]))
コード例 #52
0
def main():
    np.random.seed(0)
    tf.set_random_seed(0)

    dtype = np.float32
    # 64-bit doesn't help much, search for 64-bit in
    # https://www.wolframcloud.com/objects/5f297f41-30f7-4b1b-972c-cac8d1f8d8e4
    u.default_dtype = dtype
    machine_epsilon = np.finfo(dtype).eps  # 1e-7 or 1e-16
    train_images = load_MNIST.load_MNIST_images('data/train-images-idx3-ubyte')
    dsize = 10000
    patches = train_images[:, :dsize]
    fs = [dsize, 28 * 28, 196, 28 * 28]

    # values from deeplearning.stanford.edu/wiki/index.php/UFLDL_Tutorial
    X0 = patches
    lambda_ = 3e-3
    rho = tf.constant(0.1, dtype=dtype)
    beta = 3
    W0f = W_uniform(fs[2], fs[3])

    def f(i):
        return fs[i + 1]  # W[i] has shape f[i] x f[i-1]

    dsize = f(-1)
    n = len(fs) - 2

    # helper to create variables with numpy or TF initial value
    init_dict = {}  # {var_placeholder: init_value}
    vard = {}  # {var: util.VarInfo}

    def init_var(val, name, trainable=False, noinit=False):
        if isinstance(val, tf.Tensor):
            collections = [] if noinit else None
            var = tf.Variable(val, name=name, collections=collections)
        else:
            val = np.array(val)
            assert u.is_numeric, "Unknown type"
            holder = tf.placeholder(dtype,
                                    shape=val.shape,
                                    name=name + "_holder")
            var = tf.Variable(holder, name=name, trainable=trainable)
            init_dict[holder] = val
        var_p = tf.placeholder(var.dtype, var.shape)
        var_setter = var.assign(var_p)
        vard[var] = u.VarInfo(var_setter, var_p)
        return var

    lr = init_var(0.2, "lr")
    if purely_linear:  # need lower LR without sigmoids
        lr = init_var(.02, "lr")

    Wf = init_var(W0f, "Wf", True)
    Wf_copy = init_var(W0f, "Wf_copy", True)
    W = u.unflatten(Wf, fs[1:])  # perftodo: this creates transposes
    X = init_var(X0, "X")
    W.insert(0, X)

    def sigmoid(x):
        if not purely_linear:
            return tf.sigmoid(x)
        else:
            return tf.identity(x)

    def d_sigmoid(y):
        if not purely_linear:
            return y * (1 - y)
        else:
            return 1

    def kl(x, y):
        return x * tf.log(x / y) + (1 - x) * tf.log((1 - x) / (1 - y))

    def d_kl(x, y):
        return (1 - x) / (1 - y) - x / y

    # A[i] = activations needed to compute gradient of W[i]
    # A[n+1] = network output
    A = [None] * (n + 2)

    # A[0] is just for shape checks, assert fail on run
    # tf.assert always fails because of static assert
    # fail_node = tf.assert_equal(1, 0, message="too huge")
    fail_node = tf.Print(0, [0], "fail, this must never run")
    with tf.control_dependencies([fail_node]):
        A[0] = u.Identity(dsize, dtype=dtype)
    A[1] = W[0]
    for i in range(1, n + 1):
        A[i + 1] = sigmoid(W[i] @ A[i])

    # reconstruction error and sparsity error
    err = (A[3] - A[1])
    rho_hat = tf.reduce_sum(A[2], axis=1, keep_dims=True) / dsize

    # B[i] = backprops needed to compute gradient of W[i]
    # B2[i] = backprops from sampled labels needed for natural gradient
    B = [None] * (n + 1)
    B2 = [None] * (n + 1)
    B[n] = err * d_sigmoid(A[n + 1])
    sampled_labels_live = tf.random_normal((f(n), f(-1)), dtype=dtype, seed=0)
    sampled_labels = init_var(sampled_labels_live,
                              "sampled_labels",
                              noinit=True)
    B2[n] = sampled_labels * d_sigmoid(A[n + 1])
    for i in range(n - 1, -1, -1):
        backprop = t(W[i + 1]) @ B[i + 1]
        backprop2 = t(W[i + 1]) @ B2[i + 1]
        if i == 1 and not drop_sparsity:
            backprop += beta * d_kl(rho, rho_hat)
            backprop2 += beta * d_kl(rho, rho_hat)
        B[i] = backprop * d_sigmoid(A[i + 1])
        B2[i] = backprop2 * d_sigmoid(A[i + 1])

    # dW[i] = gradient of W[i]
    dW = [None] * (n + 1)
    pre_dW = [None] * (n + 1)  # preconditioned dW
    pre_dW_stable = [None] * (n + 1)  # preconditioned stable dW

    cov_A = [None] * (n + 1)  # covariance of activations[i]
    cov_B2 = [None] * (n + 1)  # covariance of synthetic backprops[i]
    vars_svd_A = [None] * (n + 1)
    vars_svd_B2 = [None] * (n + 1)
    for i in range(1, n + 1):
        cov_A[i] = init_var(A[i] @ t(A[i]) / dsize, "cov_A%d" % (i, ))
        cov_B2[i] = init_var(B2[i] @ t(B2[i]) / dsize, "cov_B2%d" % (i, ))
        vars_svd_A[i] = u.SvdWrapper(cov_A[i], "svd_A_%d" % (i, ))
        vars_svd_B2[i] = u.SvdWrapper(cov_B2[i], "svd_B2_%d" % (i, ))
        if use_tikhonov:
            whitened_A = u.regularized_inverse2(vars_svd_A[i], L=Lambda) @ A[i]
        else:
            whitened_A = u.pseudo_inverse2(vars_svd_A[i]) @ A[i]
        if use_tikhonov:
            whitened_B2 = u.regularized_inverse2(vars_svd_B2[i],
                                                 L=Lambda) @ B[i]
        else:
            whitened_B2 = u.pseudo_inverse2(vars_svd_B2[i]) @ B[i]
        whitened_A_stable = u.pseudo_inverse_sqrt2(vars_svd_A[i]) @ A[i]
        whitened_B2_stable = u.pseudo_inverse_sqrt2(vars_svd_B2[i]) @ B[i]
        pre_dW[i] = (whitened_B2 @ t(whitened_A)) / dsize
        pre_dW_stable[i] = (whitened_B2_stable @ t(whitened_A_stable)) / dsize
        dW[i] = (B[i] @ t(A[i])) / dsize

    # Loss function
    reconstruction = u.L2(err) / (2 * dsize)
    sparsity = beta * tf.reduce_sum(kl(rho, rho_hat))
    L2 = (lambda_ / 2) * (u.L2(W[1]) + u.L2(W[1]))

    loss = reconstruction
    if not drop_l2:
        loss = loss + L2
    if not drop_sparsity:
        loss = loss + sparsity

    grad_live = u.flatten(dW[1:])
    pre_grad_live = u.flatten(pre_dW[1:])  # fisher preconditioned gradient
    pre_grad_stable_live = u.flatten(
        pre_dW_stable[1:])  # sqrt fisher preconditioned grad
    grad = init_var(grad_live, "grad")
    pre_grad = init_var(pre_grad_live, "pre_grad")
    pre_grad_stable = init_var(pre_grad_stable_live, "pre_grad_stable")

    update_params_op = Wf.assign(Wf - lr * pre_grad).op
    update_params_stable_op = Wf.assign(Wf - lr * pre_grad_stable).op
    save_params_op = Wf_copy.assign(Wf).op
    pre_grad_dot_grad = tf.reduce_sum(pre_grad * grad)
    pre_grad_stable_dot_grad = tf.reduce_sum(pre_grad * grad)
    grad_norm = tf.reduce_sum(grad * grad)
    pre_grad_norm = u.L2(pre_grad)
    pre_grad_stable_norm = u.L2(pre_grad_stable)

    def dump_svd_info(step):
        """Dump singular values and gradient values in those coordinates."""
        for i in range(1, n + 1):
            svd = vars_svd_A[i]
            s0, u0, v0 = sess.run([svd.s, svd.u, svd.v])
            util.dump(s0, "A_%d_%d" % (i, step))
            A0 = A[i].eval()
            At0 = v0.T @ A0
            util.dump(A0 @ A0.T, "Acov_%d_%d" % (i, step))
            util.dump(At0 @ At0.T, "Atcov_%d_%d" % (i, step))
            util.dump(s0, "As_%d_%d" % (i, step))

        for i in range(1, n + 1):
            svd = vars_svd_B2[i]
            s0, u0, v0 = sess.run([svd.s, svd.u, svd.v])
            util.dump(s0, "B2_%d_%d" % (i, step))
            B0 = B[i].eval()
            Bt0 = v0.T @ B0
            util.dump(B0 @ B0.T, "Bcov_%d_%d" % (i, step))
            util.dump(Bt0 @ Bt0.T, "Btcov_%d_%d" % (i, step))
            util.dump(s0, "Bs_%d_%d" % (i, step))

    def advance_batch():
        sess.run(sampled_labels.initializer)  # new labels for next call

    def update_covariances():
        ops_A = [cov_A[i].initializer for i in range(1, n + 1)]
        ops_B2 = [cov_B2[i].initializer for i in range(1, n + 1)]
        sess.run(ops_A + ops_B2)

    def update_svds():
        if whitening_mode > 1:
            vars_svd_A[2].update()
        if whitening_mode > 2:
            vars_svd_B2[2].update()
        if whitening_mode > 3:
            vars_svd_B2[1].update()

    def init_svds():
        """Initialize our SVD to identity matrices."""
        ops = []
        for i in range(1, n + 1):
            ops.extend(vars_svd_A[i].init_ops)
            ops.extend(vars_svd_B2[i].init_ops)
        sess = tf.get_default_session()
        sess.run(ops)

    init_op = tf.global_variables_initializer()
    #  tf.get_default_graph().finalize()

    from tensorflow.core.protobuf import rewriter_config_pb2

    rewrite_options = rewriter_config_pb2.RewriterConfig(
        disable_model_pruning=True,
        constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
        memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL)
    optimizer_options = tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0)
    graph_options = tf.GraphOptions(optimizer_options=optimizer_options,
                                    rewrite_options=rewrite_options)
    config = tf.ConfigProto(graph_options=graph_options)
    #sess = tf.Session(config=config)
    sess = tf.InteractiveSession(config=config)
    sess.run(Wf.initializer, feed_dict=init_dict)
    sess.run(X.initializer, feed_dict=init_dict)
    advance_batch()
    update_covariances()
    init_svds()
    sess.run(init_op, feed_dict=init_dict)  # initialize everything else

    print("Running training.")
    u.reset_time()

    step_lengths = []  # keep track of learning rates
    losses = []
    ratios = []  # actual loss decrease / expected decrease
    grad_norms = []
    pre_grad_norms = []  # preconditioned grad norm squared
    pre_grad_stable_norms = []  # sqrt preconditioned grad norms squared
    target_delta_list = []  # predicted decrease linear approximation
    target_delta2_list = []  # predicted decrease quadratic appromation
    actual_delta_list = []  # actual decrease

    # adaptive line search parameters
    alpha = 0.3  # acceptable fraction of predicted decrease
    beta = 0.8  # how much to shrink when violation
    growth_rate = 1.05  # how much to grow when too conservative

    def update_cov_A(i):
        sess.run(cov_A[i].initializer)

    def update_cov_B2(i):
        sess.run(cov_B2[i].initializer)

    # only update whitening matrix of input activations in the beginning
    if whitening_mode > 0:
        vars_svd_A[1].update()

    # compute t(delta).H.delta/2
    def hessian_quadratic(delta):
        #    update_covariances()
        W = u.unflatten(delta, fs[1:])
        W.insert(0, None)
        total = 0
        for l in range(1, n + 1):
            decrement = tf.trace(t(W[l]) @ cov_B2[l] @ W[l] @ cov_A[l])
            total += decrement
        return (total / 2).eval()

    # compute t(delta).H^-1.delta/2
    def hessian_quadratic_inv(delta):
        #    update_covariances()
        W = u.unflatten(delta, fs[1:])
        W.insert(0, None)
        total = 0
        for l in range(1, n + 1):
            invB2 = u.pseudo_inverse2(vars_svd_B2[l])
            invA = u.pseudo_inverse2(vars_svd_A[l])
            decrement = tf.trace(t(W[l]) @ invB2 @ W[l] @ invA)
            total += decrement
        return (total / 2).eval()

    # do line search, dump values as csv
    def line_search(initial_value, direction, step, num_steps):
        saved_val = tf.Variable(Wf)
        sess.run(saved_val.initializer)
        pl = tf.placeholder(dtype, shape=(), name="linesearch_p")
        assign_op = Wf.assign(initial_value - direction * step * pl)
        vals = []
        for i in range(num_steps):
            sess.run(assign_op, feed_dict={pl: i})
            vals.append(loss.eval())
        sess.run(Wf.assign(saved_val))  # restore original value
        return vals

    for step in range(num_steps):
        update_covariances()
        if step % whiten_every_n_steps == 0:
            update_svds()

        sess.run(grad.initializer)
        sess.run(pre_grad.initializer)

        lr0, loss0 = sess.run([lr, loss])
        save_params_op.run()

        # regular inverse becomes unstable when grad norm exceeds 1
        stabilized_mode = grad_norm.eval() < 1

        if stabilized_mode and not use_tikhonov:
            update_params_stable_op.run()
        else:
            update_params_op.run()

        loss1 = loss.eval()
        advance_batch()

        # line search stuff
        target_slope = (-pre_grad_dot_grad.eval() if stabilized_mode else
                        -pre_grad_stable_dot_grad.eval())
        target_delta = lr0 * target_slope
        target_delta_list.append(target_delta)

        # second order prediction of target delta
        # TODO: the sign is wrong, debug this
        # https://www.wolframcloud.com/objects/8f287f2f-ceb7-42f7-a599-1c03fda18f28
        if local_quadratics:
            x0 = Wf_copy.eval()
            x_opt = x0 - pre_grad.eval()
            # computes t(x)@H^-1 @(x)/2
            y_opt = loss0 - hessian_quadratic_inv(grad)
            # computes t(x)@H @(x)/2
            y_expected = hessian_quadratic(Wf - x_opt) + y_opt
            target_delta2 = y_expected - loss0
            target_delta2_list.append(target_delta2)

        actual_delta = loss1 - loss0
        actual_slope = actual_delta / lr0
        slope_ratio = actual_slope / target_slope  # between 0 and 1.01
        actual_delta_list.append(actual_delta)

        if do_line_search:
            vals1 = line_search(Wf_copy, pre_grad, lr / 100, 40)
            vals2 = line_search(Wf_copy, grad, lr / 100, 40)
            u.dump(vals1, "line1-%d" % (i, ))
            u.dump(vals2, "line2-%d" % (i, ))

        losses.append(loss0)
        step_lengths.append(lr0)
        ratios.append(slope_ratio)
        grad_norms.append(grad_norm.eval())
        pre_grad_norms.append(pre_grad_norm.eval())
        pre_grad_stable_norms.append(pre_grad_stable_norm.eval())

        if step % report_frequency == 0:
            print(
                "Step %d loss %.2f, target decrease %.3f, actual decrease, %.3f ratio %.2f grad norm: %.2f pregrad norm: %.2f"
                % (step, loss0, target_delta, actual_delta, slope_ratio,
                   grad_norm.eval(), pre_grad_norm.eval()))

        if adaptive_step_frequency and adaptive_step and step > adaptive_step_burn_in:
            # shrink if wrong prediction, don't shrink if prediction is tiny
            if slope_ratio < alpha and abs(
                    target_delta) > 1e-6 and adaptive_step:
                print("%.2f %.2f %.2f" % (loss0, loss1, slope_ratio))
                print(
                    "Slope optimality %.2f, shrinking learning rate to %.2f" %
                    (
                        slope_ratio,
                        lr0 * beta,
                    ))
                sess.run(vard[lr].setter, feed_dict={vard[lr].p: lr0 * beta})

            # grow learning rate, slope_ratio .99 worked best for gradient
            elif step > 0 and i % 50 == 0 and slope_ratio > 0.90 and adaptive_step:
                print("%.2f %.2f %.2f" % (loss0, loss1, slope_ratio))
                print("Growing learning rate to %.2f" % (lr0 * growth_rate))
                sess.run(vard[lr].setter,
                         feed_dict={vard[lr].p: lr0 * growth_rate})

        u.record_time()

    # check against expected loss
    if 'Apple' in sys.version:
        pass
        #    u.dump(losses, "kfac_small_final_mac.csv")
        targets = np.loadtxt("data/kfac_small_final_mac.csv", delimiter=",")
    else:
        pass
        #    u.dump(losses, "kfac_small_final_linux.csv")
        targets = np.loadtxt("data/kfac_small_final_linux.csv", delimiter=",")

    u.check_equal(targets, losses[:len(targets)], rtol=1e-1)
    u.summarize_time()
    print("Test passed")
コード例 #53
0
    pre_dW[i] = (whitened_B2 @ t(whitened_A))/dsize
    pre_dW_stable[i] = (whitened_B2_stable @ t(whitened_A_stable))/dsize
    dW[i] = (B[i] @ t(A[i]))/dsize

  # Loss function
  reconstruction = u.L2(err) / (2 * dsize)
  sparsity = beta * tf.reduce_sum(kl(rho, rho_hat))
  L2 = (lambda_ / 2) * (u.L2(W[1]) + u.L2(W[1]))

  loss = reconstruction
  if not drop_l2:
    loss = loss + L2
  if not drop_sparsity:
    loss = loss + sparsity

  grad_live = u.flatten(dW[1:])
  pre_grad_live = u.flatten(pre_dW[1:]) # fisher preconditioned gradient
  pre_grad_stable_live = u.flatten(pre_dW_stable[1:]) # sqrt fisher preconditioned grad
  grad = init_var(grad_live, "grad")
  pre_grad = init_var(pre_grad_live, "pre_grad")
  pre_grad_stable = init_var(pre_grad_stable_live, "pre_grad_stable")

  update_params_op = Wf.assign(Wf-lr*pre_grad).op
  update_params_stable_op = Wf.assign(Wf-lr*pre_grad_stable).op
  save_params_op = Wf_copy.assign(Wf).op
  pre_grad_dot_grad = tf.reduce_sum(pre_grad*grad)
  pre_grad_stable_dot_grad = tf.reduce_sum(pre_grad*grad)
  grad_norm = tf.reduce_sum(grad*grad)
  pre_grad_norm = u.L2(pre_grad)
  pre_grad_stable_norm = u.L2(pre_grad_stable)
コード例 #54
0
def map_smpls(smpls, f):
    return util.flatten(map(f, smpls))
コード例 #55
0
def lift(height=98):
    commands = ['G1 Z{} F3600'.format(height)]
    return flatten(commands)
コード例 #56
0
def intFloor(*args):
    return [int(math.floor(x)) for x in flatten(args)]
コード例 #57
0
def model_fn(features, labels, mode, params):
  training = mode == tf.estimator.ModeKeys.TRAIN
  x = tf.reshape(features, [-1, 125, 161, 2], name='cnn6')
  x_norm = tf.layers.batch_normalization(x, training=training, name='x_norm')
  x = tf.reshape(x_norm[:, :, :, 0], [-1, 125, 161, 1], name='reshape_spec')

  if params['verbose_summary']:
    tf.summary.image('input', x)

  conv = x
  conv = tf.layers.conv2d(conv, filters=16, kernel_size=5, activation=tf.nn.relu, name='conv1')
  pool = tf.layers.max_pooling2d(conv, pool_size=[2, 2], strides=2, name='pool1')
  if params['verbose_summary']:
    log_conv_kernel('conv1')
    tf.summary.image('pool1', pool[:, :, :, 0:1])

  conv = tf.layers.conv2d(pool, filters=32, kernel_size=5, activation=tf.nn.relu, name='conv2')
  pool = tf.layers.max_pooling2d(conv, pool_size=[2, 2], strides=2, name='pool2')
  if params['verbose_summary']:
    log_conv_kernel('conv2')
    tf.summary.image('pool2', pool[:, :, :, 0:1])

  conv = tf.layers.conv2d(pool, filters=64, kernel_size=5, activation=tf.nn.relu, name='conv3')
  pool = tf.layers.max_pooling2d(conv, pool_size=[2, 2], strides=2, name='pool3')
  if params['verbose_summary']:
    log_conv_kernel('conv3')
    tf.summary.image('pool3', pool[:, :, :, 0:1])

  conv = tf.layers.conv2d(pool, filters=128, kernel_size=5, activation=tf.nn.relu, name='conv4')
  pool = tf.layers.max_pooling2d(conv, pool_size=[2, 2], strides=2, name='pool4')
  if params['verbose_summary']:
    log_conv_kernel('conv4')
    tf.summary.image('pool4', pool[:, :, :, 0:1])

  flat = flatten(pool)

  dropout4 = tf.layers.dropout(flat, rate=params['dropout_rate'], training=training, name='dropout4')
  dense4 = tf.layers.dense(dropout4, units=128, activation=tf.nn.relu, name='dense4')

  logits = tf.layers.dense(dense4, units=params['num_classes'], name='logits')

  predictions = {
    'classes': tf.argmax(logits, axis=1, name='prediction_classes'),
    'probabilities': tf.nn.softmax(logits, name='prediction_probabilities'),
  }

  if mode == ModeKeys.PREDICT:
    return EstimatorSpec(mode=mode, predictions={'predictions': predictions['probabilities']})

  tf.summary.image('confusion_matrix', conf_mat(labels, predictions['classes'], params['num_classes']))

  onehot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=params['num_classes'], name='onehot_labels')
  loss = tf.losses.softmax_cross_entropy(onehot_labels=onehot_labels, logits=logits)
  tf.summary.scalar('loss', loss)

  optimizer = tf.train.GradientDescentOptimizer(learning_rate=params['learning_rate'])
  update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
  with tf.control_dependencies(update_ops):
    train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
  eval_metric_ops = {
    'accuracy': tf.metrics.accuracy(labels=labels, predictions=predictions['classes'])
  }

  tf.summary.scalar('accuracy', eval_metric_ops['accuracy'][1])

  return EstimatorSpec(
    mode=mode,
    loss=loss,
    train_op=train_op,
    eval_metric_ops=eval_metric_ops
  )
コード例 #58
0
        whitened_B2 = u.pseudo_inverse_sqrt2(vars_svd_B2[i]) @ B[i]
        pre_dW[i] = (whitened_B2 @ t(whitened_A)) / dsize
        dW[i] = (B[i] @ t(A[i])) / dsize

    # Cost function
    reconstruction = u.L2(err) / (2 * dsize)
    sparsity = beta * tf.reduce_sum(kl(rho, rho_hat))
    L2 = (lambda_ / 2) * (u.L2(W[1]) + u.L2(W[1]))

    cost = reconstruction
    if not drop_l2:
        cost = cost + L2
    if not drop_sparsity:
        cost = cost + sparsity

    grad_live = u.flatten(dW[1:])
    pre_grad_live = u.flatten(pre_dW[1:])  # preconditioned gradient
    grad = init_var(grad_live, "grad")
    pre_grad = init_var(pre_grad_live, "pre_grad")

    update_params_op = Wf.assign(Wf - lr * pre_grad).op
    save_params_op = Wf_copy.assign(Wf).op
    pre_grad_dot_grad = tf.reduce_sum(pre_grad * grad)

    def advance_batch():
        sess.run(sampled_labels.initializer)  # new labels for next call

    def update_covariances():
        ops_A = [cov_A[i].initializer for i in range(1, n + 1)]
        ops_B2 = [cov_B2[i].initializer for i in range(1, n + 1)]
        sess.run(ops_A + ops_B2)
コード例 #59
0
ファイル: LogList.py プロジェクト: AsFal/Epimetheus
 def getAllCategoryNames(self):
     return list(
         set(flatten([log.getAllCategoryNames() for log in self.list])))
コード例 #60
0
    def tensorize_example(self, example, is_training, oov_counts=None):
        clusters = example["clusters"]

        gold_mentions = sorted(tuple(m) for m in util.flatten(clusters))
        gold_mention_map = {m: i for i, m in enumerate(gold_mentions)}
        cluster_ids = np.zeros(len(gold_mentions))
        for cluster_id, cluster in enumerate(clusters):
            for mention in cluster:
                cluster_ids[gold_mention_map[tuple(mention)]] = cluster_id

        sentences = example["sentences"]
        num_words = sum(len(s) for s in sentences)
        speakers = util.flatten(example["speakers"])

        # add POS tag and NER
        pos_tags = example["pos_tags"]

        # if self.config["use_categories"]:
        categories = example["categories"]  # categories

        if self.config["use_ner_g"] or self.config["use_ner_phi"]:
            ner_tags = example["ner_tags"]
            ner_ids = np.array([
                self.ner_tag_dict[s[2:].replace('*', '')]
                if len(s) > 1 else self.ner_tag_dict[s]
                for s in util.flatten(ner_tags)
            ])

        else:
            ner_ids = np.array([])

        assert num_words == len(speakers)

        max_sentence_length = max(len(s) for s in sentences)
        max_word_length = max(max(max(len(w) for w in s) for s in sentences),
                              max(self.config["filter_widths"]))
        word_emb = np.zeros(
            [len(sentences), max_sentence_length, self.embedding_size])
        char_index = np.zeros(
            [len(sentences), max_sentence_length, max_word_length])
        text_len = np.array([len(s) for s in sentences])

        pos_tag_emb = np.zeros(
            [len(sentences), max_sentence_length,
             len(self.pos_tag_dict)])
        ner_tag_emb = np.zeros(
            [len(sentences), max_sentence_length,
             len(self.ner_tag_dict)])
        categories_emb = np.zeros(
            [len(sentences), max_sentence_length,
             len(self.categories_dict)])

        for i, sentence in enumerate(sentences):
            for j, word in enumerate(sentence):
                current_dim = 0

                # word embedding with glove
                # k is index, 0 or 1
                # d is embedding dict, turian or glove
                # s is size (either 300 or 50)
                # l is lowercase = true or false (usually false)
                for k, (d, (s, l)) in enumerate(
                        zip(self.embedding_dicts, self.embedding_info)):
                    # print "s", s
                    # print "k", k
                    if l:
                        current_word = word.lower()
                    else:
                        current_word = word
                    if oov_counts is not None and current_word not in d:
                        oov_counts[k] += 1
                    word_emb[i, j, current_dim:current_dim +
                             s] = util.normalize(d[current_word])
                    current_dim += s

                # character embedding
                char_index[i,
                           j, :len(word)] = [self.char_dict[c] for c in word]

                # one hot encoding
                pos_tag_emb[i, j, :] = np.zeros([len(self.pos_tag_dict)])
                one = self.pos_tag_dict.get(pos_tags[i][j], 0)
                pos_tag_emb[i, j, one] = 1

                if self.config["use_ner_g"]:
                    ner_tag_emb[i, j, :] = np.zeros([len(self.ner_tag_dict)])
                    one = self.ner_tag_dict.get(ner_tags[i][j], 0)
                    ner_tag_emb[i, j, one] = 1

                if self.config["use_categories"]:
                    categories_emb[i, j, :] = np.zeros(
                        [len(self.categories_dict)])
                    one = self.categories_dict.get(categories[i][j], 0)
                    categories_emb[i, j, one] = 1

        # print type(self.glove_embedding_dict)
        # print self.glove_embedding_dict
        cat_glove_emb = np.zeros(
            [len(sentences), max_sentence_length, self.glove_embedding_size])

        # print len(self.glove_embedding_dict)

        if self.config["use_categories_glove"]:
            for i, category in enumerate(categories):
                for j, cat in enumerate(category):
                    # current_dim = 0
                    # for d in self.glove_embedding_dict:
                    if cat != '-':
                        # print cat
                        cat_glove_emb[i, j, :] = util.normalize(
                            self.glove_embedding_dict[cat])
                        # current_dim += 300

        speaker_dict = {s: i for i, s in enumerate(set(speakers))}
        speaker_ids = np.array([speaker_dict[s] for s in speakers])

        doc_key = example["doc_key"]
        genre = self.genres[doc_key[:2]]

        gold_starts, gold_ends = self.tensorize_mentions(gold_mentions)

        if is_training and len(
                sentences) > self.config["max_training_sentences"]:
            return self.truncate_example(word_emb, char_index, text_len,
                                         speaker_ids, genre, is_training,
                                         gold_starts, gold_ends, cluster_ids,
                                         pos_tag_emb, ner_tag_emb,
                                         categories_emb, ner_ids,
                                         cat_glove_emb)
            # return self.truncate_example(word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, pos_tag_emb, ner_tag_emb, ner_ids)
        else:
            return word_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids, pos_tag_emb, ner_tag_emb, categories_emb, ner_ids, cat_glove_emb