Example #1
0
 def progress(steps, elapsed):
     print '{} of {} processed ({} s)'.format(steps, len(queries),
                                              elapsed)
     util.metadata('steps', steps)
     util.metadata('gb_used', util.gb_used())
     sys.stdout.flush()
     f.flush()
Example #2
0
 def report(queries):
     # filter out NaNs
     queries = [q for q in queries if not np.isnan(q.quantile)]
     util.metadata('mean_quantile', np.mean([q.quantile for q in queries]))
     util.metadata(
         'h10',
         np.mean([
             1.0
             if util.rank_from_quantile(q.quantile, q.num_candidates) <= 10
             else 0.0 for q in queries
         ]))
    def track(self):
        report = []
        for observer in self.observers:
            metrics = observer.observe(self)
            if metrics is None:
                continue
            for name, val in metrics.iteritems():
                timestamps, values = self.history[name]
                timestamps.append(self.steps)
                values.append(val)

                util.metadata(name, val)
                report.append((name, val))

        if len(report) > 0:
            print ', '.join(['{}: {:.3f}'.format('.'.join(name), val) for name, val in report])
            with open('history.cpkl', 'w') as f:
                pickle.dump(dict(self.history), f)
    def track(self):
        report = []
        for observer in self.observers:
            metrics = observer.observe(self)
            if metrics is None:
                continue
            for name, val in metrics.iteritems():
                timestamps, values = self.history[name]
                timestamps.append(self.steps)
                values.append(val)

                util.metadata(name, val)
                report.append((name, val))

        if len(report) > 0:
            print ", ".join(["{}: {:.3f}".format(".".join(name), val) for name, val in report])
            with open("history.cpkl", "w") as f:
                pickle.dump(dict(self.history), f)
    def track(self):
        report = []
        for observer in self.observers:
            metrics = observer.observe(self)
            if metrics is None:
                continue
            for name, val in metrics.iteritems():
                timestamps, values = self.history[name]
                timestamps.append(self.steps / float(len(self.train)))
                values.append(val)

                util.metadata(name, val, self.path)
                report.append((name, val))

        if len(report) > 0:
            print ', '.join(['{}: {:.3f}'.format('.'.join(name), val)
                            for name, val in report])
            sys.stdout.flush()
            with open(join(self.path, 'history.cpkl'), 'w') as f:
                pickle.dump(dict(self.history), f)
Example #6
0
    def control(self, experiment):
        if experiment.epochs >= self.max_epochs:
            print 'Halted after reaching max epochs.'
            experiment.halt = True

        if experiment.steps % self.report_wait == 0:
            print 'steps: {}, epochs: {:.2f}'.format(experiment.steps,
                                                     experiment.epochs)
            util.metadata('steps', experiment.steps, self.path)
            util.metadata('epochs', experiment.epochs, self.path)

            # report last seen
            time_rep = datetime.now().strftime('%H:%M:%S %m/%d')
            util.metadata('last_seen', time_rep, self.path)

            # report memory used
            util.metadata('gb_used', util.gb_used(), self.path)

        if experiment.steps % self.save_wait == 0 and experiment.steps != 0:
            print 'saving params...'
            experiment.model.save_model(self.path)
    def control(self, maximizer):
        if maximizer.steps >= self.max_steps:
            print 'Halted after reaching max steps.'
            maximizer.halt = True

        if maximizer.steps % self.report_wait == 0:
            epochs = float(maximizer.steps * maximizer.batch_size) / len(maximizer.train)
            print 'steps: {}, epochs: {:.2f}'.format(maximizer.steps, epochs)
            util.metadata('steps', maximizer.steps)
            util.metadata('epochs', epochs)

            # report last seen
            time_rep = datetime.now().strftime('%H:%M:%S %m/%d')
            util.metadata('last_seen', time_rep)

            # report memory used
            util.metadata('gb_used', util.gb_used())

        if maximizer.steps % self.save_wait == 0 and maximizer.steps != 0:
            print 'saving params...'
            with open('params.cpkl', 'w') as f:
                # convert params to picklable format
                params = SparseVector(maximizer.params.as_dict())
                pickle.dump(params, f)
    def control(self, maximizer):
        if maximizer.steps >= self.max_steps:
            print "Halted after reaching max steps."
            maximizer.halt = True

        if maximizer.steps % self.report_wait == 0:
            epochs = float(maximizer.steps * maximizer.batch_size) / len(maximizer.train)
            print "steps: {}, epochs: {:.2f}".format(maximizer.steps, epochs)
            util.metadata("steps", maximizer.steps)
            util.metadata("epochs", epochs)

            # report last seen
            time_rep = datetime.now().strftime("%H:%M:%S %m/%d")
            util.metadata("last_seen", time_rep)

            # report memory used
            util.metadata("gb_used", util.gb_used())

        if maximizer.steps % self.save_wait == 0 and maximizer.steps != 0:
            print "saving params..."
            with open("params.cpkl", "w") as f:
                # convert params to picklable format
                params = SparseVector(maximizer.params.as_dict())
                pickle.dump(params, f)
def parse_dataset(data_path, dev_mode=False, maximum_examples=float('inf')):
    data_path = join(data_directory, data_path)

    entities = set()
    relations = set()

    def get_examples(name):
        filename = join(data_path, name)
        if not isfile(filename):
            print 'Warning: ', filename, ' not found. Skipping...'
            return None

        examples_arr = list()
        with open(filename, 'r') as f:
            num_examples = 0
            for line in util.verboserate(f):
                if num_examples >= maximum_examples:
                        break
                items = line.split()
                s, path, t = items[:3]
                rels = tuple(path.split(','))
                entities.add(s)
                entities.add(t)
                relations.update(rels)

                if len(items) >= 4:
                    label = items[3]
                else:
                    label = '1'  # if no label, assume positive

                # only add positive examples
                if label == '1':
                    examples_arr.append(PathQuery(s, rels, t))
                    num_examples += 1

        return examples_arr

    def get_triples(queries):
        triples_arr = list()
        for query in queries:
            if len(query.r) == 1:
                triples_arr.append((query.s, str(query.r[0]), query.t))
        return triples_arr

    # add datasets
    print 'loading dataset:', data_path

    attributes = {}

    # use the dev set or the test set
    split = 'dev' if dev_mode else 'test'
    util.metadata('split', split)

    print 'Evaluating on {} set.'.format(split.upper())

    for name in ['train', 'test', 'test_deduction', 'test_induction']:
        attributes[name] = get_examples(name.replace('test', split))

    attributes['entity_list'] = list(entities)
    attributes['relations_list'] = list(relations)

    # add graphs
    triples = {}
    for name in ['train', 'test']:
        triples[name] = get_triples(attributes[name])
    attributes['train_graph'] = Graph(triples['train'])
    attributes['full_graph'] = Graph(triples['train'] + triples['test'])

    return util.Bunch(**attributes)
def parse_dataset(data_path, dev_mode=False, maximum_examples=float('inf')):
    data_path = join(data_directory, data_path)

    entities = set()
    relations = set()

    def get_examples(name):
        filename = join(data_path, name)
        if not isfile(filename):
            print 'Warning: ', filename, ' not found. Skipping...'
            return None

        examples_arr = list()
        with open(filename, 'r') as f:
            num_examples = 0
            for line in util.verboserate(f):
                if num_examples >= maximum_examples:
                    break
                items = line.split()
                s, path, t = items[:3]
                rels = tuple(path.split(','))
                entities.add(s)
                entities.add(t)
                relations.update(rels)

                if len(items) >= 4:
                    label = items[3]
                else:
                    label = '1'  # if no label, assume positive

                # only add positive examples
                if label == '1':
                    examples_arr.append(PathQuery(s, rels, t))
                    num_examples += 1

        return examples_arr

    def get_triples(queries):
        triples_arr = list()
        for query in queries:
            if len(query.r) == 1:
                triples_arr.append((query.s, str(query.r[0]), query.t))
        return triples_arr

    # add datasets
    print 'loading dataset:', data_path

    attributes = {}

    # use the dev set or the test set
    split = 'dev' if dev_mode else 'test'
    util.metadata('split', split)

    print 'Evaluating on {} set.'.format(split.upper())

    for name in ['train', 'test', 'test_deduction', 'test_induction']:
        attributes[name] = get_examples(name.replace('test', split))

    attributes['entity_list'] = list(entities)
    attributes['relations_list'] = list(relations)

    # add graphs
    triples = {}
    for name in ['train', 'test']:
        triples[name] = get_triples(attributes[name])
    attributes['train_graph'] = Graph(triples['train'])
    attributes['full_graph'] = Graph(triples['train'] + triples['test'])

    return util.Bunch(**attributes)
import cPickle


# In[ ]:

parser = argparse.ArgumentParser()
parser.add_argument('dataset')
parser.add_argument('model')
parser.add_argument('params')

if util.in_ipython():
    args = parser.parse_args(['freebase_socher', 'bilinear', 'compositional_wvec_bilinear_freebase_socher_0xe7d4cf_params.cpkl'])
else:
    args = parser.parse_args()

util.metadata('dataset', args.dataset)
util.metadata('model', args.model)
util.metadata('params', args.params)
util.metadata('split', 'test')
    
model = CompositionalModel(None, path_model=args.model, objective='margin')
params = load_params(args.params, args.model)

dev = dns.load_socher_test(join(args.dataset, 'dev'))
test = dns.load_socher_test(join(args.dataset, 'test'))

def score(samples):
    for ex in samples:
        try:
            ex.score = model.predict(params, ex).ravel()[0]
        except KeyError:
# In[ ]:

parser = argparse.ArgumentParser()
parser.add_argument('dataset')
parser.add_argument('model')
parser.add_argument('params')

if util.in_ipython():
    args = parser.parse_args([
        'freebase_socher', 'bilinear',
        'compositional_wvec_bilinear_freebase_socher_0xe7d4cf_params.cpkl'
    ])
else:
    args = parser.parse_args()

util.metadata('dataset', args.dataset)
util.metadata('model', args.model)
util.metadata('params', args.params)
util.metadata('split', 'test')

model = CompositionalModel(None, path_model=args.model, objective='margin')
params = load_params(args.params, args.model)

dev = dns.load_socher_test(join(args.dataset, 'dev'))
test = dns.load_socher_test(join(args.dataset, 'test'))


def score(samples):
    for ex in samples:
        try:
            ex.score = model.predict(params, ex).ravel()[0]
 def progress(steps, elapsed):
     print '{} of {} processed ({} s)'.format(steps, len(queries), elapsed)
     util.metadata('steps', steps)
     util.metadata('gb_used', util.gb_used())
     sys.stdout.flush()
     f.flush()
 def report(queries):
     # filter out NaNs
     queries = [q for q in queries if not np.isnan(q.quantile)]
     util.metadata('mean_quantile', np.mean([q.quantile for q in queries]))
     util.metadata('h10', np.mean([1.0 if util.rank_from_quantile(q.quantile, q.num_candidates) <= 10 else 0.0 for q in queries]))