def progress(steps, elapsed): print '{} of {} processed ({} s)'.format(steps, len(queries), elapsed) util.metadata('steps', steps) util.metadata('gb_used', util.gb_used()) sys.stdout.flush() f.flush()
def report(queries): # filter out NaNs queries = [q for q in queries if not np.isnan(q.quantile)] util.metadata('mean_quantile', np.mean([q.quantile for q in queries])) util.metadata( 'h10', np.mean([ 1.0 if util.rank_from_quantile(q.quantile, q.num_candidates) <= 10 else 0.0 for q in queries ]))
def track(self): report = [] for observer in self.observers: metrics = observer.observe(self) if metrics is None: continue for name, val in metrics.iteritems(): timestamps, values = self.history[name] timestamps.append(self.steps) values.append(val) util.metadata(name, val) report.append((name, val)) if len(report) > 0: print ', '.join(['{}: {:.3f}'.format('.'.join(name), val) for name, val in report]) with open('history.cpkl', 'w') as f: pickle.dump(dict(self.history), f)
def track(self): report = [] for observer in self.observers: metrics = observer.observe(self) if metrics is None: continue for name, val in metrics.iteritems(): timestamps, values = self.history[name] timestamps.append(self.steps) values.append(val) util.metadata(name, val) report.append((name, val)) if len(report) > 0: print ", ".join(["{}: {:.3f}".format(".".join(name), val) for name, val in report]) with open("history.cpkl", "w") as f: pickle.dump(dict(self.history), f)
def track(self): report = [] for observer in self.observers: metrics = observer.observe(self) if metrics is None: continue for name, val in metrics.iteritems(): timestamps, values = self.history[name] timestamps.append(self.steps / float(len(self.train))) values.append(val) util.metadata(name, val, self.path) report.append((name, val)) if len(report) > 0: print ', '.join(['{}: {:.3f}'.format('.'.join(name), val) for name, val in report]) sys.stdout.flush() with open(join(self.path, 'history.cpkl'), 'w') as f: pickle.dump(dict(self.history), f)
def control(self, experiment): if experiment.epochs >= self.max_epochs: print 'Halted after reaching max epochs.' experiment.halt = True if experiment.steps % self.report_wait == 0: print 'steps: {}, epochs: {:.2f}'.format(experiment.steps, experiment.epochs) util.metadata('steps', experiment.steps, self.path) util.metadata('epochs', experiment.epochs, self.path) # report last seen time_rep = datetime.now().strftime('%H:%M:%S %m/%d') util.metadata('last_seen', time_rep, self.path) # report memory used util.metadata('gb_used', util.gb_used(), self.path) if experiment.steps % self.save_wait == 0 and experiment.steps != 0: print 'saving params...' experiment.model.save_model(self.path)
def control(self, maximizer): if maximizer.steps >= self.max_steps: print 'Halted after reaching max steps.' maximizer.halt = True if maximizer.steps % self.report_wait == 0: epochs = float(maximizer.steps * maximizer.batch_size) / len(maximizer.train) print 'steps: {}, epochs: {:.2f}'.format(maximizer.steps, epochs) util.metadata('steps', maximizer.steps) util.metadata('epochs', epochs) # report last seen time_rep = datetime.now().strftime('%H:%M:%S %m/%d') util.metadata('last_seen', time_rep) # report memory used util.metadata('gb_used', util.gb_used()) if maximizer.steps % self.save_wait == 0 and maximizer.steps != 0: print 'saving params...' with open('params.cpkl', 'w') as f: # convert params to picklable format params = SparseVector(maximizer.params.as_dict()) pickle.dump(params, f)
def control(self, maximizer): if maximizer.steps >= self.max_steps: print "Halted after reaching max steps." maximizer.halt = True if maximizer.steps % self.report_wait == 0: epochs = float(maximizer.steps * maximizer.batch_size) / len(maximizer.train) print "steps: {}, epochs: {:.2f}".format(maximizer.steps, epochs) util.metadata("steps", maximizer.steps) util.metadata("epochs", epochs) # report last seen time_rep = datetime.now().strftime("%H:%M:%S %m/%d") util.metadata("last_seen", time_rep) # report memory used util.metadata("gb_used", util.gb_used()) if maximizer.steps % self.save_wait == 0 and maximizer.steps != 0: print "saving params..." with open("params.cpkl", "w") as f: # convert params to picklable format params = SparseVector(maximizer.params.as_dict()) pickle.dump(params, f)
def parse_dataset(data_path, dev_mode=False, maximum_examples=float('inf')): data_path = join(data_directory, data_path) entities = set() relations = set() def get_examples(name): filename = join(data_path, name) if not isfile(filename): print 'Warning: ', filename, ' not found. Skipping...' return None examples_arr = list() with open(filename, 'r') as f: num_examples = 0 for line in util.verboserate(f): if num_examples >= maximum_examples: break items = line.split() s, path, t = items[:3] rels = tuple(path.split(',')) entities.add(s) entities.add(t) relations.update(rels) if len(items) >= 4: label = items[3] else: label = '1' # if no label, assume positive # only add positive examples if label == '1': examples_arr.append(PathQuery(s, rels, t)) num_examples += 1 return examples_arr def get_triples(queries): triples_arr = list() for query in queries: if len(query.r) == 1: triples_arr.append((query.s, str(query.r[0]), query.t)) return triples_arr # add datasets print 'loading dataset:', data_path attributes = {} # use the dev set or the test set split = 'dev' if dev_mode else 'test' util.metadata('split', split) print 'Evaluating on {} set.'.format(split.upper()) for name in ['train', 'test', 'test_deduction', 'test_induction']: attributes[name] = get_examples(name.replace('test', split)) attributes['entity_list'] = list(entities) attributes['relations_list'] = list(relations) # add graphs triples = {} for name in ['train', 'test']: triples[name] = get_triples(attributes[name]) attributes['train_graph'] = Graph(triples['train']) attributes['full_graph'] = Graph(triples['train'] + triples['test']) return util.Bunch(**attributes)
import cPickle # In[ ]: parser = argparse.ArgumentParser() parser.add_argument('dataset') parser.add_argument('model') parser.add_argument('params') if util.in_ipython(): args = parser.parse_args(['freebase_socher', 'bilinear', 'compositional_wvec_bilinear_freebase_socher_0xe7d4cf_params.cpkl']) else: args = parser.parse_args() util.metadata('dataset', args.dataset) util.metadata('model', args.model) util.metadata('params', args.params) util.metadata('split', 'test') model = CompositionalModel(None, path_model=args.model, objective='margin') params = load_params(args.params, args.model) dev = dns.load_socher_test(join(args.dataset, 'dev')) test = dns.load_socher_test(join(args.dataset, 'test')) def score(samples): for ex in samples: try: ex.score = model.predict(params, ex).ravel()[0] except KeyError:
# In[ ]: parser = argparse.ArgumentParser() parser.add_argument('dataset') parser.add_argument('model') parser.add_argument('params') if util.in_ipython(): args = parser.parse_args([ 'freebase_socher', 'bilinear', 'compositional_wvec_bilinear_freebase_socher_0xe7d4cf_params.cpkl' ]) else: args = parser.parse_args() util.metadata('dataset', args.dataset) util.metadata('model', args.model) util.metadata('params', args.params) util.metadata('split', 'test') model = CompositionalModel(None, path_model=args.model, objective='margin') params = load_params(args.params, args.model) dev = dns.load_socher_test(join(args.dataset, 'dev')) test = dns.load_socher_test(join(args.dataset, 'test')) def score(samples): for ex in samples: try: ex.score = model.predict(params, ex).ravel()[0]
def report(queries): # filter out NaNs queries = [q for q in queries if not np.isnan(q.quantile)] util.metadata('mean_quantile', np.mean([q.quantile for q in queries])) util.metadata('h10', np.mean([1.0 if util.rank_from_quantile(q.quantile, q.num_candidates) <= 10 else 0.0 for q in queries]))