Example #1
0
 def init_data(self, param):
     if param.data is not None or isinstance(param, NumericFeatureParameters):
         return
     param.num = self.feature_extractor.num_features_non_numeric(param.suffix)
     if isinstance(param.dim, Number):  # Dimensions given as a number, not as a file to load
         param.data = defaultdict(lambda d=param.dim: Config().random.normal(size=d))
         param.data[UnknownDict.UNKNOWN]  # Initialize unknown value
     else:  # Otherwise, not a number but a string with path to word vectors file
         w2v = load_word2vec(param.dim)
         unk = Config().random.normal(size=w2v.vector_size)
         param.dim = w2v.vector_size
         param.data = UnknownDict({x: w2v[x] for x in w2v.vocab}, unk)
Example #2
0
def main():
    Config().args.nowrite = True
    out_file = os.environ.get("PARAMS_FILE", "params.csv")
    w2v_files = [os.environ[f] for f in os.environ if f.startswith("W2V_FILE")]
    num = int(os.environ.get("PARAMS_NUM", 30))
    np.random.seed()
    domains = (
        ("seed", 2147483647),
        ("classifier",      100 * [config.FEEDFORWARD_NN] + list(config.CLASSIFIERS)),
        ("wordvectors",     [50, 100, 200, 300] + [load_word2vec(f) for f in w2v_files]),
        ("tagdim",          (5, 10, 20)),
        ("labeldim",        (5, 10, 20)),
        ("punctdim",        (1, 2, 3)),
        ("gapdim",          (1, 2, 3)),
        ("actiondim",       (3, 5, 10)),
        ("layerdim",        (50, 100, 200, 300, 500, 1000)),
        ("layers",          [1] + 5 * [2]),
        ("activation",      config.ACTIVATIONS),
        ("init",            5 * [config.INITIALIZATIONS[0]] + list(config.INITIALIZATIONS)),
        ("batchsize",       (10, 30, 50, 100, 200, 500)),
        ("minibatchsize",   (50, 100, 200, 300, 500, 1000)),
        ("nbepochs",        range(10, 51)),
        ("optimizer",       10 * [config.OPTIMIZERS[0]] + list(config.OPTIMIZERS)),
        ("loss",            20 * [config.OBJECTIVES[0]] + list(config.OBJECTIVES)),
        ("importance",      (1, 2)),
        ("earlyupdate",     6 * [False] + [True]),
        ("iterations",      range(1, 21)),
        ("worddropout",     (0, .1, .2, .25, .3)),
        ("normalize",       (False, True)),
        ("regularizer",     [None] + 3 * [config.REGULARIZERS[-1]] + list(config.REGULARIZERS)),
        ("regularization",  (1e-7, 1e-6, 1e-5, 1e-4)),
        ("dropout",         (0, .1, .2, .3, .4, .5)),
    )
    params = [Params(OrderedDict(p))
              for p in zip(*[[(n, v) for v in np.random.choice(vs, num)] for n, vs in domains])]
    print("All parameter combinations to try:")
    print("\n".join(map(str, params)))
    print("Saving results to '%s'" % out_file)
    with open(out_file, "w") as f:
        csv.writer(f).writerow(params[0].get_field_titles())
    for param in params:
        param.run()
        with open(out_file, "a") as f:
            csv.writer(f).writerow(param.get_fields())
        best = max(params, key=Params.score)
        print("Best parameters: %s" % best)
Example #3
0
 def init_data(self, param):
     if param.data is not None or isinstance(param, NumericFeatureParameters):
         return
     param.num = self.feature_extractor.num_features_non_numeric(param.suffix)
     if isinstance(param.dim, Number):
         param.data = DropoutDict(max_size=param.size, dropout=param.dropout)
     else:
         w2v = load_word2vec(param.dim)
         vocab = w2v.vocab
         if param.size is None or param.size == 0:
             param.size = len(w2v.vocab) + 1
         else:
             vocab = list(vocab)[:param.size - 1]
         param.dim = w2v.vector_size
         weights = np.array([w2v[x] for x in vocab])
         unknown = weights.mean(axis=0)
         param.init = (np.vstack((unknown, weights)),)
         param.data = DropoutDict(max_size=param.size, keys=vocab, dropout=param.dropout)