Ejemplo n.º 1
0
def upload_box(mail, api, cmax=None):
  result, data = mail.search(None, "ALL")
  if result == 'OK':
    ids = data[0].split() # Ids is a space separated string.
    ids.reverse()
    if cmax is None:
      cmax = len(ids)
    else:
      cmax = min(cmax, len(ids))
    i = 0
    tfetch = 0
    tupload = 0
    print('0.0 %', end='')
    for id in ids[:cmax]:
      # Fetch raw message.
      (result, data), elapsed = timed(mail.fetch, id, "(RFC822)") # fetch the email body (RFC822) for the given ID.
      tfetch += elapsed
      # Encode.
      raw = data[0][1] # Raw mail, in bytes.
      data, cte = _decode(raw)
      # Upload into API.
      try:
        result, elapsed = timed(api.message_insert, 'media', data, cte)
        tupload += elapsed
      except ApiError as err:
        print('\nFailed to upload one message: {}\n'.format(err))
      # Progress.
      i = i+1
      percent = i * 100 / cmax
      print('\r\033[0K{0:.2f} %'.format(percent), end='')
    print('\r\033[0KDone.\n-- fetch time: {}\n-- upload time {}'.format(tfetch, tupload))
  else:
    print('Could not access the mail box: {}'.format(result))
Ejemplo n.º 2
0
    def run_guarded(self, context):
        period = context.period

        if config.log_level == "processes":
            print()

        try:
            for k, v in self.subprocesses:
                if config.log_level == "processes":
                    print("    *", end=' ')
                    if k is not None:
                        print(k, end=' ')
                    utils.timed(v.run_guarded, context)
                else:
                    v.run_guarded(context)
                    #            print "done."
                context.simulation.start_console(context)
        finally:
            if config.autodump is not None:
                self._autodump(context)

            if config.autodiff is not None:
                self._autodiff(period)

            if self.purge:
                self.entity.purge_locals()
Ejemplo n.º 3
0
def test_graph_components():
    @tf.function
    def comps(spins, iters):
        return gis.largest_cluster(spins, max_iters=iters)

    graphs = [
        nx.Graph([(0, 1), (0, 2), (1, 2), (2, 3), (3, 6),
                  (5, 4)]),  # comp sizes 2, 5
        nx.Graph([(0, 1), (0, 2), (1, 2), (3, 6),
                  (5, 4)]),  # comp sizes 2, 2, 3
        nx.Graph([(0, 1), (0, 2), (1, 2), (2, 5), (3, 4),
                  (5, 4)]),  # comp sizes 6, less vertices
    ]
    tfgs = [TFGraph(g) for g in graphs]
    gis = GraphIsing(tfgs, 10, 10)
    s0 = gis.initial_spins(1.0)

    with timed('direct'):
        assert (gis.largest_cluster(s0).numpy() == [5, 3, 6]).all()
    with timed('comps(3) #1'):
        assert (comps(s0, tf.constant(3)).numpy() == [5, 3, 6]).all()
    with timed('comps(16) #1'):
        assert (comps(s0, tf.constant(16)).numpy() == [5, 3, 6]).all()
    with timed('comps(2) #1'):
        assert (comps(s0, tf.constant(2)).numpy() == [3, 3, 6]).all()
    with timed('comps(2) #2'):
        assert (comps(s0, tf.constant(2)).numpy() == [3, 3, 6]).all()

    gis.log_metrics()
Ejemplo n.º 4
0
    def run_guarded(self, context):
        period = context.period

        if config.log_level == "processes":
            print()

        try:
            for k, v in self.subprocesses:
                if config.log_level == "processes":
                    print("    *", end=' ')
                    if k is not None:
                        print(k, end=' ')
                    utils.timed(v.run_guarded, context)
                else:
                    v.run_guarded(context)
                    #            print "done."
                context.simulation.start_console(context)
        finally:
            if config.autodump is not None:
                self._autodump(context)

            if config.autodiff is not None:
                self._autodiff(period)

            if self.purge:
                self.entity.purge_locals()
Ejemplo n.º 5
0
 def sgd(self, reg=.9, lr_init=1., step=2, n_iters=300000, val_interval=5000):
     """
     when using Gibbs approximation and the current y is passed to the gradient function,
     this will perform CD-k (i.e., starting the gibbs sampler from the current y, and
     sampling one example after a burn-in time of k steps, typically 1)
     """
     print '[START] SML/SGD Training\n\nTR/VAL/TE SIZES: %s\n' % self.crf.Ns
     grad = self.regularize(reg)[1] if reg > 0 else self.grad
     self.val_loss, self.Ws_val, lr = [], [], lr_init
     with timed('SML/SGD', self):
         try:
             for i in xrange(1, n_iters+1):
                 r = randint(0, self.crf.N_tr - 1)
                 g = grad(self.W_opt, self.crf.X[r], self.crf.Y[r])
                 self.W_opt -= lr * g
                 print 'Iteration #%s: lr=%s, |grad|=%s' % (i, lr, np.linalg.norm(g))
                 if step:
                     lr = lr_init * np.power(.1, np.floor(i * (step+1) / n_iters))
                 if i % val_interval == 0:
                     print '\nCurrent norm: |W| = %s' % np.linalg.norm(self.W_opt)
                     Ws = self.crf.split_W(self.W_opt)
                     with timed('Validation Iter (MAP predict)', skip=''):
                         loss = self.ev(self.crf.Y_v,
                             [self.crf.MAP(x,Ws) for x in self.crf.X_v])
                     print '\tVAL LOSS: %s\n' % self.ev.get_names(loss)
                     self.val_loss.append(loss)
                     self.Ws_val.append(np.array(self.W_opt))
         except KeyboardInterrupt:
             print '\nINFO - Manually exited train loop at Iteration %s' % i
     return self.W_opt, self.val_loss, self.Ws_val
Ejemplo n.º 6
0
        def simulate_period(period_idx, period, processes, entities,
                            init=False):
            print("\nperiod", period)
            if init:
                for entity in entities:
                    print("  * %s: %d individuals" % (entity.name,
                                                      len(entity.array)))
            else:
                print("- loading input data")
                for entity in entities:
                    print("  *", entity.name, "...", end=' ')
                    timed(entity.load_period_data, period)
                    print("    -> %d individuals" % len(entity.array))
            for entity in entities:
                entity.array_period = period
                entity.array['period'] = period

            if processes:
                # build context for this period:
                const_dict = {'__simulation__': self,
                              'period': period,
                              'nan': float('nan'),
                              '__globals__': globals_data}

                num_processes = len(processes)
                for p_num, process_def in enumerate(processes, start=1):
                    process, periodicity = process_def

                    print("- %d/%d" % (p_num, num_processes), process.name,
                          end=' ')
                    print("...", end=' ')
                    if period_idx % periodicity == 0:
                        elapsed, _ = gettime(process.run_guarded, self,
                                             const_dict)
                    else:
                        elapsed = 0
                        print("skipped (periodicity)")

                    process_time[process.name] += elapsed
                    if config.show_timings:
                        print("done (%s elapsed)." % time2str(elapsed))
                    else:
                        print("done.")
                    self.start_console(process.entity, period,
                                       globals_data)

            print("- storing period data")
            for entity in entities:
                print("  *", entity.name, "...", end=' ')
                timed(entity.store_period_data, period)
                print("    -> %d individuals" % len(entity.array))
#            print " - compressing period data"
#            for entity in entities:
#                print "  *", entity.name, "...",
#                for level in range(1, 10, 2):
#                    print "   %d:" % level,
#                    timed(entity.compress_period_data, level)
            period_objects[period] = sum(len(entity.array)
                                         for entity in entities)
Ejemplo n.º 7
0
def preprocess(experiment, dataset):
    with timed("Preprocessing training data"):
        train_set = list(
            zip(transpose(normalise(pad(dataset['train']['data'], 4))),
                dataset['train']['labels']))
    with timed("Preprocessing test data"):
        test_set = list(
            zip(transpose(normalise(dataset['test']['data'])),
                dataset['test']['labels']))

    return train_set, test_set
Ejemplo n.º 8
0
        def simulate_period(period_idx, period, processes, entities, init=False):
            print "\nperiod", period
            if init:
                for entity in entities:
                    print "  * %s: %d individuals" % (entity.name, len(entity.array))
            else:
                print "- loading input data"
                for entity in entities:
                    print "  *", entity.name, "...",
                    timed(entity.load_period_data, period)
                    print "    -> %d individuals" % len(entity.array)
            for entity in entities:
                entity.array_period = period
                entity.array["period"] = period

            if processes:
                # build context for this period:
                const_dict = {"period": period, "nan": float("nan"), "__globals__": globals_data}

                num_processes = len(processes)
                for p_num, process_def in enumerate(processes, start=1):
                    process, periodicity = process_def

                    print "- %d/%d" % (p_num, num_processes), process.name,
                    # TODO: provide a custom __str__ method for Process &
                    # Assignment instead
                    if hasattr(process, "predictor") and process.predictor and process.predictor != process.name:
                        print "(%s)" % process.predictor,
                    print "...",
                    if period_idx % periodicity == 0:
                        elapsed, _ = gettime(process.run_guarded, self, const_dict)
                    else:
                        elapsed = 0
                        print "skipped (periodicity)"

                    process_time[process.name] += elapsed
                    if config.show_timings:
                        print "done (%s elapsed)." % time2str(elapsed)
                    else:
                        print "done."
                    self.start_console(process.entity, period, globals_data)

            print "- storing period data"
            for entity in entities:
                print "  *", entity.name, "...",
                timed(entity.store_period_data, period)
                print "    -> %d individuals" % len(entity.array)
            #            print " - compressing period data"
            #            for entity in entities:
            #                print "  *", entity.name, "...",
            #                for level in range(1, 10, 2):
            #                    print "   %d:" % level,
            #                    timed(entity.compress_period_data, level)
            period_objects[period] = sum(len(entity.array) for entity in entities)
Ejemplo n.º 9
0
 def run_guarded(self, simulation, const_dict):
     print
     for k, v in self.subprocesses:
         print "    *",
         if k is not None:
             print k,
         utils.timed(v.run_guarded, simulation, const_dict)
         #            print "done."
         simulation.start_console(v.entity, const_dict["period"], const_dict["__globals__"])
     # purge all local variables
     temp_vars = self.entity.temp_variables
     all_vars = self.entity.variables
     local_vars = set(temp_vars.keys()) - set(all_vars.keys())
     for var in local_vars:
         del temp_vars[var]
Ejemplo n.º 10
0
Archivo: simple.py Proyecto: deti/boss
    def aggregate(self, report_id):
        logbook.info("Get customer usage aggregation for {}", report_id)
        customer = Customer.get_by_id(report_id.customer_id)
        if not customer:
            raise Exception("Customer %s not found" % report_id.customer_id)

        with timed("get_usage simple"):
            aggregated_usage = ServiceUsage.get_usage(customer, report_id.start, report_id.end)

        tariffs = {}
        services = set()
        for usage in aggregated_usage:
            service_id, tariff_id, cost, usage_volume = usage
            services.add(service_id)
            if not tariff_id:
                logbook.error("ServiceUsage {} is not completed. Tariff is not filled", usage)
                continue
            tariff = Tariff.get_by_id(tariff_id)
            tariff_report = tariffs.get(tariff_id)
            if tariff_report is None:
                tariff_report = self.tariff_report_type(tariff, customer)
                tariffs[tariff_id] = tariff_report

            tariff_report.add_usage(usage)

        total = Counter()
        for tariff_id, tariff in tariffs.items():
            total_tariff, currency = tariff.aggregate()
            total[currency] += total_tariff

        for t, value in total.items():
            total[t] = decimal_to_string(value)

        logbook.info("Aggregated {} for {}. Services: {}", total, customer, services)
        return self.prepare_result(list(tariffs.values()), total, customer, report_id.start, report_id.end)
Ejemplo n.º 11
0
    def aggregate(self, report_id):
        logbook.info("Get detailed customer usage aggregation for {}",
                     report_id)

        customer = Customer.get_by_id(report_id.customer_id)
        if not customer:
            raise Exception("Customer %s not found" % report_id.customer_id)

        with timed("get_usage simple"):
            aggregated_usage = ServiceUsage.get_detailed_usage(
                customer, report_id.start, report_id.end)

        tariffs = {}
        services = set()
        for usage in aggregated_usage:
            tariff = Tariff.get_by_id(usage.tariff_id)
            tariff_report = tariffs.get(usage.tariff_id)
            if tariff_report is None:
                tariff_report = self.tariff_report_type(tariff, customer)
                tariffs[usage.tariff_id] = tariff_report

            tariff_report.add_usage(usage)

        total = Counter()
        for tariff_id, tariff in tariffs.items():
            total_tariff, currency = tariff.aggregate()
            total[currency] += total_tariff

        for t, value in total.items():
            total[t] = decimal_to_string(value)

        logbook.info("Aggregated {} for {}. Services: {}", total, customer,
                     services)
        return self.prepare_result(list(tariffs.values()), total, customer,
                                   report_id.start, report_id.end)
Ejemplo n.º 12
0
 def _build_model(self, device=None, batch_size=None):
     with timed(f"\nBuilding model..."):
         net = vars(models)[self.config.net]
         model = Network(union(net(), self.config.losses))
         if device:
             print(f"Transferring model to device gpu:{device}")
             model = model.to(device)
     return model
Ejemplo n.º 13
0
 def val(W):
     print 'current W - obj: %s, norm: %s' % (obj(W), np.linalg.norm(W))
     Ws = self.crf.split_W(W)
     with timed('Validation (MAP predict)', skip=''):
         loss = self.ev(self.crf.Y_v, [self.crf.MAP(x, Ws) for x in self.crf.X_v])
     print '\tVAL LOSS: %s\n' % self.ev.get_names(loss)
     self.val_loss.append(loss)
     self.Ws_val.append(np.array(W))
Ejemplo n.º 14
0
    def _preprocess(self, dataset):
        with timed("\nPreprocessing training data"):
            train_data, train_labels = dataset['train']['data'], dataset[
                'train']['labels']
            if self.config.train_preprocessors:
                train_data = toolz.pipe(train_data,
                                        *self.config.train_preprocessors)
            train_set = list(zip(train_data, train_labels))

        with timed("Preprocessing test data"):
            test_data, test_labels = dataset['test']['data'], dataset['test'][
                'labels']

            if self.config.test_preprocessors:
                test_data = toolz.pipe(test_data,
                                       *self.config.test_preprocessors)
            test_set = list(zip(test_data, test_labels))

        return train_set, test_set
Ejemplo n.º 15
0
def test_bench():
    with tf.device("/device:CPU:0"):
        N = 1000
        K = 1000
        g = nx.random_graphs.powerlaw_cluster_graph(N, 5, 0.5)
        tfg = TFGraph(g)
        gis = GraphIsing([tfg] * K)
        s0 = gis.initial_spins(-1.0)
        s1 = gis.initial_spins(1.0)
        print("Graphs: {} powerlaw graphs, {} nodes ({} tot nodes)".format(
            K, N, N * K))

        @tf.function
        def repeat_update(spins, iters):
            for i in range(iters):
                spins = gis.update(spins, 0.5)
            return spins

        @tf.function
        def repeat_components(spins):
            return gis.largest_cluster(spins)

        @tf.function
        def repeat_sampled_components(spins, samples):
            return gis.sampled_largest_cluster(spins, samples=samples)

        with timed('warmup'):
            repeat_update(s0, tf.constant(1))
        with timed('run 100x updates #1'):
            repeat_update(s0, tf.constant(100))
        with timed('run 100x updates #2'):
            repeat_update(s0, tf.constant(100))

        with timed('warmup'):
            repeat_components(s1)
        with timed('run 1x components #1'):
            repeat_components(s1)
        with timed('run 1x components #2'):
            repeat_components(s1)

        with timed('warmup'):
            repeat_sampled_components(s1, tf.constant(1))
        with timed('run 1x sampled components (10 samples) #1'):
            repeat_sampled_components(s1, tf.constant(10))
        with timed('run 1x sampled components (10 samples) #2'):
            repeat_sampled_components(s1, tf.constant(10))

        gis.log_metrics()
Ejemplo n.º 16
0
    def run_guarded(self, simulation, const_dict):
        period = const_dict['period']

        print()
        for k, v in self.subprocesses:
            print("    *", end=' ')
            if k is not None:
                print(k, end=' ')
            utils.timed(v.run_guarded, simulation, const_dict)
#            print "done."
            simulation.start_console(v.entity, period,
                                     const_dict['__globals__'])
        if config.autodump is not None:
            self._autodump(period)

        if config.autodiff is not None:
            self._autodiff(period)

        if self.purge:
            self.entity.purge_locals()
Ejemplo n.º 17
0
    def run_guarded(self, simulation, const_dict):
        global max_vars
        
        periods = const_dict['periods']
        idx = const_dict['period_idx']
        period =  periods[idx]
        
        print()
        for k, v in self.subprocesses:
#             print("    *", end=' ')
            if k is not None:
                print(k, end=' ')
            utils.timed(v.run_guarded, simulation, const_dict)
#            print "done."
            simulation.start_console(v.entity, period,
                                     const_dict['__globals__'])
        if config.autodump is not None:
            self._autodump(period)

        if config.autodiff is not None:
            self._autodiff(period)

        # purge all local variables
        temp_vars = self.entity.temp_variables
        all_vars = self.entity.variables
        local_var_names = set(temp_vars.keys()) - set(all_vars.keys())
        num_locals = len(local_var_names)
        if config.debug and num_locals:
            local_vars = [v for k, v in temp_vars.iteritems()
                          if k in local_var_names and
                             isinstance(v, np.ndarray)]
            max_vars = max(max_vars, num_locals)
            temp_mem = sum(v.nbytes for v in local_vars)
            avgsize = sum(v.dtype.itemsize for v in local_vars) / num_locals
            print(("purging {} variables (max {}), will free {} of memory "
                  "(avg field size: {} b)".format(num_locals, max_vars,
                                                  utils.size2str(temp_mem),
                                                  avgsize)))

        for var in local_var_names:
            del temp_vars[var]
Ejemplo n.º 18
0
def test_update_and_caching():
    N = 1000
    K = 100
    g = nx.random_graphs.powerlaw_cluster_graph(N, 3, 0.5)
    with timed('TFGraph and GraphIsing'):
        tfg = TFGraph(g)
        gis = GraphIsing([tfg] * K, N, N * 4)
        s0 = gis.initial_spins(-1.0)

    @tf.function
    def repeat(iters, data):
        for i in range(iters):
            data = gis.update(data, 0.5)
        return data

    with timed('single #1'):
        s = repeat(tf.constant(1), s0)
    with timed('single #2'):
        s = repeat(tf.constant(1), s0)
    with timed('repeated(10) #1'):
        s = repeat(tf.constant(10), s0)
    with timed('repeated(10) #2'):
        s = repeat(tf.constant(10), s0)
    #print([cf.structured_input_signature for cf in repeat._list_all_concrete_functions_for_serialization()])
    assert len(repeat._list_all_concrete_functions_for_serialization()) == 1

    gis.set_graphs([tfg] * K)
    with timed('single #3'):
        s = repeat(tf.constant(1), -s0)
    assert len(repeat._list_all_concrete_functions_for_serialization()) == 1

    gis.log_metrics()
Ejemplo n.º 19
0
def test_create():
    N = 10
    K = 10
    g = nx.random_graphs.powerlaw_cluster_graph(N, 3, 0.5)
    # test exact sizes
    with timed('TFGraph'):
        tfg = TFGraph(g, N, N * 4)
    with timed('GraphIsing'):
        gis = GraphIsing(K, N, N * 4)
    with timed('set_graphs'):
        gis.set_graphs([tfg] * K)
    with timed('GraphIsing (with graphs)'):
        gis2 = GraphIsing([tfg] * K, N, N * 4)

    # smaller grapn can be set to gis
    with timed('set_graphs with smaller TFGraph'):
        g2 = nx.random_graphs.powerlaw_cluster_graph(N / 2, 3, 0.5)
        tfg2 = TFGraph(g2)
        gis.set_graphs([tfg2] * (2 * K // 3))
    assert (gis.v_node_masks.numpy()[0, :g2.order()] == True).all()
    assert (gis.v_node_masks.numpy()[0, g2.order():] == False).all()

    # gis with auto sizes
    with timed('auto sized TFGraph and GraphIsing'):
        g3 = nx.random_graphs.powerlaw_cluster_graph(N, 3, 0.5)
        tfg3 = TFGraph(g3)
        gis3 = GraphIsing([tfg3] * K)
        gis.set_graphs([tfg2] * K)
Ejemplo n.º 20
0
def report_file_generate(self, report_id):
    from report import Report
    from memdb.report_cache import ReportCache, ReportTask

    report_cache = ReportCache()

    aggregated = report_cache.get_report_aggregated(report_id)
    if not aggregated:
        aggregated = get_aggregation(report_id)
        aggregated = ReportCache.unpack_aggregated(ReportCache.pack_aggregated(aggregated))
    report_generator = Report.get_report(report_id.report_type)
    with timed("rendering for %s" % report_id):
        data = report_generator.render(aggregated, report_id)
    report_cache.set_report(report_id, data, report_generator.report_cache_time)
    ReportTask().remove(report_id)
Ejemplo n.º 21
0
def user_tiles(adapter, count=5000):

    query = timed(adapter.query_tile_time_percent)

    time_sum = 0
    with open('./tiles.txt') as f:
        queries = (tuple(int(x) for x in line.split('/')[1:5]) for line in f)
        for zoom, resolution, x, y in itertools.islice(queries, count):
        
            time_v, count_v = query(x, y, zoom, resolution, 0.0, 1.0)
            time_sum += time_v.microseconds

            time.sleep(DELAY)
        
        average = time_sum / count / 1.e6
        print 'User queries: average time {}'.format(average)
Ejemplo n.º 22
0
    def get_tenant_usage(self, tenant_id, meter_name, start, end, limit=None):
        """ Queries ceilometer for all the entries in a given range,
           for a given meter, from this tenant."""

        query = [self.filter('timestamp', 'ge', start), self.filter('timestamp', 'lt', end)]

        if tenant_id:
            query.append(self.filter('project_id', 'eq', tenant_id))

        if meter_name:
            query.append(self.filter('meter', 'eq', meter_name))

        with timed('fetch global usage for meter %s' % meter_name):
            result = openstack.client_ceilometer.new_samples.list(q=query, limit=limit)
            log.debug("Get usage for tenant: {} and meter_name {} ({} - {}). Number records: {}",
                      tenant_id, meter_name, start, end, len(result))
            return result
Ejemplo n.º 23
0
def report_file_generate(self, report_id):
    from report import Report
    from memdb.report_cache import ReportCache, ReportTask

    report_cache = ReportCache()

    aggregated = report_cache.get_report_aggregated(report_id)
    if not aggregated:
        aggregated = get_aggregation(report_id)
        aggregated = ReportCache.unpack_aggregated(
            ReportCache.pack_aggregated(aggregated))
    report_generator = Report.get_report(report_id.report_type)
    with timed("rendering for %s" % report_id):
        data = report_generator.render(aggregated, report_id)
    report_cache.set_report(report_id, data,
                            report_generator.report_cache_time)
    ReportTask().remove(report_id)
Ejemplo n.º 24
0
    def filter_and_group(usage):
        usage_by_resource = defaultdict(list)
        with timed("filter and group by resource"):
            trust_sources = set(conf.fitter.trust_sources)
            for u in usage:
                # the user can make their own samples, including those
                # that would collide with what we care about for
                # billing.
                # if we have a list of trust sources configured, then
                # discard everything not matching.
                if trust_sources and u.source not in trust_sources:
                    logbook.warning('ignoring untrusted usage sample from source `{}`', u['source'])
                    continue

                resource_id = u.resource_id
                usage_by_resource[resource_id].append(u)
        return usage_by_resource
Ejemplo n.º 25
0
    def filter_and_group(usage):
        usage_by_resource = defaultdict(list)
        with timed("filter and group by resource"):
            trust_sources = set(conf.fitter.trust_sources)
            for u in usage:
                # the user can make their own samples, including those
                # that would collide with what we care about for
                # billing.
                # if we have a list of trust sources configured, then
                # discard everything not matching.
                if trust_sources and u.source not in trust_sources:
                    logbook.warning(
                        'ignoring untrusted usage sample from source `{}`',
                        u['source'])
                    continue

                resource_id = u.resource_id
                usage_by_resource[resource_id].append(u)
        return usage_by_resource
Ejemplo n.º 26
0
def region_time(adapter, latlon_mag=160, latlon_step=160, time_steps=5, zoom=4):
    
    lat0s = lat1s = lon0s = lon1s = [float(x) / 180. for x in range(-latlon_mag, latlon_mag + latlon_step, latlon_step)]
    starts = ends = [float(x) / time_steps for x in range(time_steps)]
    
    query = timed(adapter.query_region_latlon_time_percent)
    
    count = 0
    time_sum = 0.
    for lat0, lat1, lon0, lon1, start, end in itertools.product(lat0s, lat1s, lon0s, lon1s, starts, ends):
        if lat0 >= lat1 or lon0 >= lon1 or start >= end: continue
                    
        time_v, count_v = query(lat0, lon0, lat1, lon1, zoom, start, end)
        count += 1
        time_sum += time_v.microseconds
    
        time.sleep(DELAY)

    print 'For regions, average time {}'.format(time_sum / count / 1.e6)
Ejemplo n.º 27
0
    def get_tenant_usage(self, tenant_id, meter_name, start, end, limit=None):
        """ Queries ceilometer for all the entries in a given range,
           for a given meter, from this tenant."""

        query = [
            self.filter('timestamp', 'ge', start),
            self.filter('timestamp', 'lt', end)
        ]

        if tenant_id:
            query.append(self.filter('project_id', 'eq', tenant_id))

        if meter_name:
            query.append(self.filter('meter', 'eq', meter_name))

        with timed('fetch global usage for meter %s' % meter_name):
            result = openstack.client_ceilometer.new_samples.list(q=query,
                                                                  limit=limit)
            log.debug(
                "Get usage for tenant: {} and meter_name {} ({} - {}). Number records: {}",
                tenant_id, meter_name, start, end, len(result))
            return result
Ejemplo n.º 28
0
 def train(self, reg=.9, method='L-BFGS-B', disp=True, maxiter=100):
     """
     if implementing self.{obj(W),grad(W)} to be used with scipy optimization 
     """
     print '[START] SML/SGD Training\n\nTR/VAL/TE SIZES: %s\n' % self.crf.Ns
     obj, grad = self.regularize(reg) if reg > 0 else (self.obj, self.grad)
     self.val_loss, self.Ws_val = [], []
     def val(W):
         print 'current W - obj: %s, norm: %s' % (obj(W), np.linalg.norm(W))
         Ws = self.crf.split_W(W)
         with timed('Validation (MAP predict)', skip=''):
             loss = self.ev(self.crf.Y_v, [self.crf.MAP(x, Ws) for x in self.crf.X_v])
         print '\tVAL LOSS: %s\n' % self.ev.get_names(loss)
         self.val_loss.append(loss)
         self.Ws_val.append(np.array(W))
     with timed('Scipy Opt: %s' % method, self):
         try:
             self.opt = minimize(obj, self.W_opt, method=method, jac=grad, callback=val,
                 options={'maxiter': maxiter, 'disp': disp})
             self.W_opt = self.opt.x
         except KeyboardInterrupt:
             print '\nINFO - Manually exited Scipy training'
     return self.W_opt, self.val_loss, self.Ws_val
Ejemplo n.º 29
0
def tile_time_resolution(adapter, time_steps=5, zoom=4, tile_samples=20, resolution=8):

    random.seed(1)
    all_coords = list(itertools.product(range(0, 2**zoom), range(0, 2**zoom)))
    coords = random.sample(all_coords, tile_samples)

    starts = ends = [float(x) / time_steps for x in range(time_steps)]

    query = timed(adapter.query_tile_time_percent)

    count = 0
    time_sum = 0
    for start, end, (x, y) in itertools.product(starts, ends, coords):
        if start >= end: continue
    
        time_v, count_v = query(x, y, zoom, resolution, start, end)
        count += 1
        time_sum += time_v.microseconds

        time.sleep(DELAY)
    
    average = time_sum / count / 1.e6
    print 'At resolution {} average time {}'.format(resolution, average)
Ejemplo n.º 30
0
Archivo: run.py Proyecto: benpastel/m5
def validate_on_end():
    X, y = load_data()
    days, ids, feats = X.shape
    assert y.shape == (days, ids)
    assert ids == ALL_IDS
    assert days == DATA_DAYS - SKIP_DAYS

    valid_X = X[-VALID_DAYS:].reshape(-1, feats)
    valid_y = y[-VALID_DAYS:].flatten()
    train_X = X[:-VALID_DAYS].reshape(-1, feats)
    train_y = y[:-VALID_DAYS].flatten()
    del X
    del y
    gc.collect()

    with timed(f'training lightgbm with X.shape={train_X.shape}'):
        model = lgb.LGBMRegressor(n_estimators=100)
        model.fit(train_X, train_y)

    print('train error:')
    valid_stats(model.predict(train_X), train_y, should_print=True)

    print('valid error:')
    valid_stats(model.predict(valid_X), valid_y, should_print=True)
Ejemplo n.º 31
0
import utils

from contextlib import closing

from django.db import connection
from django.utils import timezone


def sql_simple_insert(n_records):
    with closing(connection.cursor()) as cursor:
        for i in xrange(0, n_records):
            cursor.execute(
                'INSERT INTO app_testmodel (field_1, field_2, field_3)'
                'VALUES (%s, %s, %s)',
                (i, str(i), timezone.now()),
            )


if __name__ == '__main__':
    utils.timed(sql_simple_insert)
import utils

from contextlib import closing

from django.db import connection
from django.utils import timezone


def sql_batch_insert(n_records):
    sql = 'INSERT INTO app_testmodel (field_1, field_2, field_3) VALUES {}'.format(
        ', '.join(['(%s, %s, %s)'] * n_records),
    )
    params = []
    for i in xrange(0, n_records):
        params.extend([i, str(i), timezone.now()])

    with closing(connection.cursor()) as cursor:
        cursor.execute(sql, params)


if __name__ == '__main__':
    utils.timed(sql_batch_insert)
Ejemplo n.º 33
0
import utils

from contextlib import closing
import csv
from cStringIO import StringIO

from django.db import connection
from django.utils import timezone


def copy_from(n_records):
    stream = StringIO()
    writer = csv.writer(stream, delimiter='\t')

    for i in xrange(0, n_records):
        writer.writerow([i, str(i), timezone.now().isoformat()])

    stream.seek(0)

    with closing(connection.cursor()) as cursor:
        cursor.copy_from(
            file=stream,
            table='app_testmodel',
            sep='\t',
            columns=('field_1', 'field_2', 'field_3'),
        )


if __name__ == '__main__':
    utils.timed(copy_from)
Ejemplo n.º 34
0
 def _post_build_process(self, model):
     with timed(f"\nPost processing model..."):
         model = toolz.pipe(model, *self.config.post_build_processors)
     return model
Ejemplo n.º 35
0
 def test(self, x, y):
     with timed('predict'):
         y_pred = model.predict(x)
     print('========== accuracy_score = {}'.format(accuracy_score(y, y_pred)))
     print('========== confusion_matrix:')
     print(confusion_matrix(y, y_pred))
Ejemplo n.º 36
0
    output_entities = output_file.create_group("/", "entities", "Entities")
    for table in input_file.iterNodes(input_root.entities):
        table_fields = get_fields(table)
        table_fields = [(fname, ftype) for fname, ftype in table_fields
                        if fname not in todrop]
        size = (len(table) * table.dtype.itemsize) / 1024.0 / 1024.0
        #noinspection PyProtectedMember
        print(" * copying table %s (%.2f Mb) ..." % (table._v_name, size),
              end=' ')
        copy_table(table, output_entities, table_fields)
        print("done.")

    input_file.close()
    output_file.close()


if __name__ == '__main__':
    import sys
    import platform

    print("LIAM HDF5 drop fields %s using Python %s (%s)\n" % \
          (__version__, platform.python_version(), platform.architecture()[0]))

    args = sys.argv
    if len(args) < 4:
        print("Usage: %s inputpath outputpath field1 [field2 ...]" % args[0])
        sys.exit()

    timed(dropfields, args[1], args[2], args[3:])
Ejemplo n.º 37
0
 def _augment(self, dataset):
     with timed("\nAugmenting dataset..."):
         augmented = Transform(dataset, self.config.augmentations)
     return augmented
Ejemplo n.º 38
0
    def run(self, run_console=False):
        start_time = time.time()

        h5in, h5out, globals_data = timed(self.data_source.run,
                                          self.globals_def,
                                          entity_registry,
                                          self.init_period)

        if config.autodump or config.autodiff:
            if config.autodump:
                fname, _ = config.autodump
                mode = 'w'
            else:  # config.autodiff
                fname, _ = config.autodiff
                mode = 'r'
            fpath = os.path.join(config.output_directory, fname)
            h5_autodump = tables.open_file(fpath, mode=mode)
            config.autodump_file = h5_autodump
        else:
            h5_autodump = None

#        input_dataset = self.data_source.run(self.globals_def,
#                                             entity_registry)
#        output_dataset = self.data_sink.prepare(self.globals_def,
#                                                entity_registry)
#        output_dataset.copy(input_dataset, self.init_period - 1)
#        for entity in input_dataset:
#            indexed_array = buildArrayForPeriod(entity)

        # tell numpy we do not want warnings for x/0 and 0/0
        np.seterr(divide='ignore', invalid='ignore')

        process_time = defaultdict(float)
        period_objects = {}
        eval_ctx = EvaluationContext(self, self.entities_map, globals_data)

        def simulate_period(period_idx, period, periods, processes, entities,
                            init=False):
            period_start_time = time.time()

            # set current period
            eval_ctx.period = period

            if config.log_level in ("procedures", "processes"):
                print()
            print("period", period,
                  end=" " if config.log_level == "periods" else "\n")
            if init and config.log_level in ("procedures", "processes"):
                for entity in entities:
                    print("  * %s: %d individuals" % (entity.name,
                                                      len(entity.array)))
            else:
                if config.log_level in ("procedures", "processes"):
                    print("- loading input data")
                    for entity in entities:
                        print("  *", entity.name, "...", end=' ')
                        timed(entity.load_period_data, period)
                        print("    -> %d individuals" % len(entity.array))
                else:
                    for entity in entities:
                        entity.load_period_data(period)
            for entity in entities:
                entity.array_period = period
                entity.array['period'] = period

            if processes:
                # build context for this period:
                const_dict = {'period_idx': period_idx + 1,
                              'periods': periods,
                              'periodicity': time_period[self.time_scale] * (1 - 2 * (self.retro)),
                              'longitudinal': self.longitudinal,
                              'format_date': self.time_scale,
                              'pension': None,
                              '__simulation__': self,
                              'period': period,
                              'nan': float('nan'),
                              '__globals__': globals_data}
                assert(periods[period_idx + 1] == period)

                num_processes = len(processes)
                for p_num, process_def in enumerate(processes, start=1):

                    process, periodicity, start = process_def
                    if config.log_level in ("procedures", "processes"):
                        print("- %d/%d" % (p_num, num_processes), process.name,
                              end=' ')
                        print("...", end=' ')
                    # TDOD: change that
                    if isinstance(periodicity, int):
                        if period_idx % periodicity == 0:
                            elapsed, _ = gettime(process.run_guarded, self,
                                                 const_dict)
                        else:
                            elapsed = 0
                            print("skipped (periodicity)")
                    else:
                        assert periodicity in time_period
                        periodicity_process = time_period[periodicity]
                        periodicity_simul = time_period[self.time_scale]
                        month_idx = period % 100
                        # first condition, to run a process with start == 12
                        # each year even if year are yyyy01
                        # modify start if periodicity_simul is not month
                        start = int(start / periodicity_simul - 0.01) * periodicity_simul + 1

                        if (periodicity_process <= periodicity_simul and self.time_scale != 'year0') or (
                                month_idx % periodicity_process == start % periodicity_process):

                            const_dict['periodicity'] = periodicity_process * (1 - 2 * (self.retro))
                            elapsed, _ = gettime(process.run_guarded, self, const_dict)
                        else:
                            elapsed = 0

                        if config.log_level in ("procedures", "processes"):
                            print("skipped (periodicity)")

                    process_time[process.name] += elapsed
                    if config.log_level in ("procedures", "processes"):
                        if config.show_timings:
                            print("done (%s elapsed)." % time2str(elapsed))
                        else:
                            print("done.")
                    self.start_console(eval_ctx)

            # update longitudinal
            person = [x for x in entities if x.name == 'person'][0]
            # maybe we have a get_entity or anything more nice than that #TODO: check
            id = person.array.columns['id']

            for varname in ['sali', 'workstate']:
                var = person.array.columns[varname]
                if init:
                    fpath = self.data_source.input_path
                    input_file = HDFStore(fpath, mode="r")
                    if 'longitudinal' in input_file.root:
                        input_longitudinal = input_file.root.longitudinal
                        if varname in input_longitudinal:
                            self.longitudinal[varname] = input_file['/longitudinal/' + varname]
                            if period not in self.longitudinal[varname].columns:
                                table = DataFrame({'id': id, period: var})
                                self.longitudinal[varname] = self.longitudinal[varname].merge(
                                    table, on='id', how='outer')
                        else:
                            # when one variable is not in the input_file
                            self.longitudinal[varname] = DataFrame({'id': id, period: var})
                    else:
                        # when there is no longitudinal in the dataset
                        self.longitudinal[varname] = DataFrame({'id': id, period: var})
                else:
                    table = DataFrame({'id': id, period: var})
                    if period in self.longitudinal[varname]:
                        import pdb
                        pdb.set_trace()
                    self.longitudinal[varname] = self.longitudinal[varname].merge(table, on='id', how='outer')

            if config.log_level in ("procedures", "processes"):
                print("- storing period data")
                for entity in entities:
                    print("  *", entity.name, "...", end=' ')
                    timed(entity.store_period_data, period)
                    print("    -> %d individuals" % len(entity.array))
            else:
                for entity in entities:
                    entity.store_period_data(period)

#            print " - compressing period data"
#            for entity in entities:
#                print "  *", entity.name, "...",
#                for level in range(1, 10, 2):
#                    print "   %d:" % level,
#                    timed(entity.compress_period_data, level)
            period_objects[period] = sum(len(entity.array)
                                         for entity in entities)
            period_elapsed_time = time.time() - period_start_time
            if config.log_level in ("procedures", "processes"):
                print("period %d" % period, end=' ')
            print("done", end=' ')
            if config.show_timings:
                print("(%s elapsed)" % time2str(period_elapsed_time), end="")
                if init:
                    print(".")
                else:
                    main_elapsed_time = time.time() - main_start_time
                    periods_done = period_idx + 1
                    remaining_periods = self.periods - periods_done
                    avg_time = main_elapsed_time / periods_done
                    # future_time = period_elapsed_time * 0.4 + avg_time * 0.6
                    remaining_time = avg_time * remaining_periods
                    print(" - estimated remaining time: %s."
                          % time2str(remaining_time))
            else:
                print()

        print("""
=====================
 starting simulation
=====================""")
        try:
            assert(self.time_scale in time_period)
            month_periodicity = time_period[self.time_scale]
            time_direction = 1 - 2 * (self.retro)
            time_step = month_periodicity * time_direction

            periods = [
                self.init_period + int(t / 12) * 100 + t % 12
                for t in range(0, (self.periods + 1) * time_step, time_step)
                ]
            if self.time_scale == 'year0':
                periods = [self.init_period + t for t in range(0, (self.periods + 1))]
            print("simulated period are going to be: ", periods)

            init_start_time = time.time()
            simulate_period(0, self.init_period, [None, periods[0]], self.init_processes, self.entities, init=True)

            time_init = time.time() - init_start_time
            main_start_time = time.time()

            for period_idx, period in enumerate(periods[1:]):
                period_start_time = time.time()
                simulate_period(period_idx, period, periods,
                                self.processes, self.entities)

#                 if self.legislation:
#                     if not self.legislation['ex_post']:
#
#                         elapsed, _ = gettime(liam2of.main,period)
#                         process_time['liam2of'] += elapsed
#                         elapsed, _ = gettime(of_on_liam.main,self.legislation['annee'],[period])
#                         process_time['legislation'] += elapsed
#                         elapsed, _ = gettime(merge_leg.merge_h5,self.data_source.output_path,
#                                              "C:/Til/output/"+"simul_leg.h5",period)
#                         process_time['merge_leg'] += elapsed

                time_elapsed = time.time() - period_start_time
                print("period %d done" % period, end=' ')
                if config.show_timings:
                    print("(%s elapsed)." % time2str(time_elapsed))
                else:
                    print()

            total_objects = sum(period_objects[period] for period in periods)
            total_time = time.time() - main_start_time

#             if self.legislation:
#                 if self.legislation['ex_post']:
#
#                     elapsed, _ = gettime(liam2of.main)
#                     process_time['liam2of'] += elapsed
#                     elapsed, _ = gettime(of_on_liam.main,self.legislation['annee'])
#                     process_time['legislation'] += elapsed
#                     # TODO: faire un programme a part, so far ca ne marche pas pour l'ensemble
#                     # adapter n'est pas si facile, comme on veut economiser une table,
#                     # on ne peut pas faire de append directement parce qu on met 2010 apres 2011
#                     # a un moment dans le calcul
#                     elapsed, _ = gettime(merge_leg.merge_h5,self.data_source.output_path,
#                                          "C:/Til/output/"+"simul_leg.h5",None)
#                     process_time['merge_leg'] += elapsed

            if self.final_stat:
                elapsed, _ = gettime(start, period)
                process_time['Stat'] += elapsed

            total_time = time.time() - main_start_time
            time_year = 0
            if len(periods) > 1:
                nb_year_approx = periods[-1] / 100 - periods[1] / 100
                if nb_year_approx > 0:
                    time_year = total_time / nb_year_approx

            try:
                ind_per_sec = str(int(total_objects / total_time))
            except ZeroDivisionError:
                ind_per_sec = 'inf'
            print("""
==========================================
 simulation done
==========================================
 * %s elapsed
 * %d individuals on average
 * %s individuals/s/period on average

 * %s second for init_process
 * %s time/period in average
 * %s time/year in average
==========================================
""" % (
                time2str(time.time() - start_time),
                total_objects / self.periods,
                ind_per_sec,
                time2str(time_init),
                time2str(total_time / self.periods),
                time2str(time_year))
            )

            show_top_processes(process_time, 10)
            # if config.debug:
            #     show_top_expr()

            if run_console:
                console_ctx = eval_ctx.clone(entity_name=self.default_entity)
                c = console.Console(console_ctx)
                c.run()

        finally:
            if h5in is not None:
                h5in.close()
            h5out.close()
            if h5_autodump is not None:
                h5_autodump.close()
Ejemplo n.º 39
0
        def simulate_period(period_idx, period, periods, processes, entities,
                            init=False):
            period_start_time = time.time()

            # set current period
            eval_ctx.period = period

            if config.log_level in ("procedures", "processes"):
                print()
            print("period", period,
                  end=" " if config.log_level == "periods" else "\n")
            if init and config.log_level in ("procedures", "processes"):
                for entity in entities:
                    print("  * %s: %d individuals" % (entity.name,
                                                      len(entity.array)))
            else:
                if config.log_level in ("procedures", "processes"):
                    print("- loading input data")
                    for entity in entities:
                        print("  *", entity.name, "...", end=' ')
                        timed(entity.load_period_data, period)
                        print("    -> %d individuals" % len(entity.array))
                else:
                    for entity in entities:
                        entity.load_period_data(period)
            for entity in entities:
                entity.array_period = period
                entity.array['period'] = period

            if processes:
                # build context for this period:
                const_dict = {'period_idx': period_idx + 1,
                              'periods': periods,
                              'periodicity': time_period[self.time_scale] * (1 - 2 * (self.retro)),
                              'longitudinal': self.longitudinal,
                              'format_date': self.time_scale,
                              'pension': None,
                              '__simulation__': self,
                              'period': period,
                              'nan': float('nan'),
                              '__globals__': globals_data}
                assert(periods[period_idx + 1] == period)

                num_processes = len(processes)
                for p_num, process_def in enumerate(processes, start=1):

                    process, periodicity, start = process_def
                    if config.log_level in ("procedures", "processes"):
                        print("- %d/%d" % (p_num, num_processes), process.name,
                              end=' ')
                        print("...", end=' ')
                    # TDOD: change that
                    if isinstance(periodicity, int):
                        if period_idx % periodicity == 0:
                            elapsed, _ = gettime(process.run_guarded, self,
                                                 const_dict)
                        else:
                            elapsed = 0
                            print("skipped (periodicity)")
                    else:
                        assert periodicity in time_period
                        periodicity_process = time_period[periodicity]
                        periodicity_simul = time_period[self.time_scale]
                        month_idx = period % 100
                        # first condition, to run a process with start == 12
                        # each year even if year are yyyy01
                        # modify start if periodicity_simul is not month
                        start = int(start / periodicity_simul - 0.01) * periodicity_simul + 1

                        if (periodicity_process <= periodicity_simul and self.time_scale != 'year0') or (
                                month_idx % periodicity_process == start % periodicity_process):

                            const_dict['periodicity'] = periodicity_process * (1 - 2 * (self.retro))
                            elapsed, _ = gettime(process.run_guarded, self, const_dict)
                        else:
                            elapsed = 0

                        if config.log_level in ("procedures", "processes"):
                            print("skipped (periodicity)")

                    process_time[process.name] += elapsed
                    if config.log_level in ("procedures", "processes"):
                        if config.show_timings:
                            print("done (%s elapsed)." % time2str(elapsed))
                        else:
                            print("done.")
                    self.start_console(eval_ctx)

            # update longitudinal
            person = [x for x in entities if x.name == 'person'][0]
            # maybe we have a get_entity or anything more nice than that #TODO: check
            id = person.array.columns['id']

            for varname in ['sali', 'workstate']:
                var = person.array.columns[varname]
                if init:
                    fpath = self.data_source.input_path
                    input_file = HDFStore(fpath, mode="r")
                    if 'longitudinal' in input_file.root:
                        input_longitudinal = input_file.root.longitudinal
                        if varname in input_longitudinal:
                            self.longitudinal[varname] = input_file['/longitudinal/' + varname]
                            if period not in self.longitudinal[varname].columns:
                                table = DataFrame({'id': id, period: var})
                                self.longitudinal[varname] = self.longitudinal[varname].merge(
                                    table, on='id', how='outer')
                        else:
                            # when one variable is not in the input_file
                            self.longitudinal[varname] = DataFrame({'id': id, period: var})
                    else:
                        # when there is no longitudinal in the dataset
                        self.longitudinal[varname] = DataFrame({'id': id, period: var})
                else:
                    table = DataFrame({'id': id, period: var})
                    if period in self.longitudinal[varname]:
                        import pdb
                        pdb.set_trace()
                    self.longitudinal[varname] = self.longitudinal[varname].merge(table, on='id', how='outer')

            if config.log_level in ("procedures", "processes"):
                print("- storing period data")
                for entity in entities:
                    print("  *", entity.name, "...", end=' ')
                    timed(entity.store_period_data, period)
                    print("    -> %d individuals" % len(entity.array))
            else:
                for entity in entities:
                    entity.store_period_data(period)

#            print " - compressing period data"
#            for entity in entities:
#                print "  *", entity.name, "...",
#                for level in range(1, 10, 2):
#                    print "   %d:" % level,
#                    timed(entity.compress_period_data, level)
            period_objects[period] = sum(len(entity.array)
                                         for entity in entities)
            period_elapsed_time = time.time() - period_start_time
            if config.log_level in ("procedures", "processes"):
                print("period %d" % period, end=' ')
            print("done", end=' ')
            if config.show_timings:
                print("(%s elapsed)" % time2str(period_elapsed_time), end="")
                if init:
                    print(".")
                else:
                    main_elapsed_time = time.time() - main_start_time
                    periods_done = period_idx + 1
                    remaining_periods = self.periods - periods_done
                    avg_time = main_elapsed_time / periods_done
                    # future_time = period_elapsed_time * 0.4 + avg_time * 0.6
                    remaining_time = avg_time * remaining_periods
                    print(" - estimated remaining time: %s."
                          % time2str(remaining_time))
            else:
                print()
import utils

from django.utils import timezone

from app import models


def orm_bulk_create(n_records):
    instances = [
        models.TestModel(
            field_1=i,
            field_2=str(i),
            field_3=timezone.now(),
        )
        for i in xrange(0, n_records)
    ]

    models.TestModel.objects.bulk_create(instances)


if __name__ == '__main__':
    utils.timed(orm_bulk_create)
Ejemplo n.º 41
0
 def load(self):
     return timed(self.data_source.load, self.globals_def, self.entities_map)
Ejemplo n.º 42
0
        def simulate_period(period_idx, period, processes, entities,
                            init=False):
            period_start_time = time.time()

            # set current period
            eval_ctx.period = period

            if config.log_level in ("functions", "processes"):
                print()
            print("period", period,
                  end=" " if config.log_level == "periods" else "\n")
            if init and config.log_level in ("functions", "processes"):
                for entity in entities:
                    print("  * %s: %d individuals" % (entity.name,
                                                      len(entity.array)))
            else:
                if config.log_level in ("functions", "processes"):
                    print("- loading input data")
                    for entity in entities:
                        print("  *", entity.name, "...", end=' ')
                        timed(entity.load_period_data, period)
                        print("    -> %d individuals" % len(entity.array))
                else:
                    for entity in entities:
                        entity.load_period_data(period)
            for entity in entities:
                entity.array_period = period
                entity.array['period'] = period

            if processes:
                num_processes = len(processes)
                for p_num, process_def in enumerate(processes, start=1):
                    process, periodicity = process_def

                    # set current entity
                    eval_ctx.entity_name = process.entity.name

                    if config.log_level in ("functions", "processes"):
                        print("- %d/%d" % (p_num, num_processes), process.name,
                              end=' ')
                        print("...", end=' ')
                    if period_idx % periodicity == 0:
                        elapsed, _ = gettime(process.run_guarded, eval_ctx)
                    else:
                        elapsed = 0
                        if config.log_level in ("functions", "processes"):
                            print("skipped (periodicity)")

                    process_time[process.name] += elapsed
                    if config.log_level in ("functions", "processes"):
                        if config.show_timings:
                            print("done (%s elapsed)." % time2str(elapsed))
                        else:
                            print("done.")
                    self.start_console(eval_ctx)

            if config.log_level in ("functions", "processes"):
                print("- storing period data")
                for entity in entities:
                    print("  *", entity.name, "...", end=' ')
                    timed(entity.store_period_data, period)
                    print("    -> %d individuals" % len(entity.array))
            else:
                for entity in entities:
                    entity.store_period_data(period)
#            print " - compressing period data"
#            for entity in entities:
#                print "  *", entity.name, "...",
#                for level in range(1, 10, 2):
#                    print "   %d:" % level,
#                    timed(entity.compress_period_data, level)
            period_objects[period] = sum(len(entity.array)
                                         for entity in entities)
            period_elapsed_time = time.time() - period_start_time
            if config.log_level in ("functions", "processes"):
                print("period %d" % period, end=' ')
            print("done", end=' ')
            if config.show_timings:
                print("(%s elapsed)" % time2str(period_elapsed_time), end="")
                if init:
                    print(".")
                else:
                    main_elapsed_time = time.time() - main_start_time
                    periods_done = period_idx + 1
                    remaining_periods = self.periods - periods_done
                    avg_time = main_elapsed_time / periods_done
                    # future_time = period_elapsed_time * 0.4 + avg_time * 0.6
                    remaining_time = avg_time * remaining_periods
                    print(" - estimated remaining time: %s."
                          % time2str(remaining_time))
            else:
                print()
Ejemplo n.º 43
0
    def run_single(self, run_console=False, run_num=None):
        start_time = time.time()

        input_dataset = timed(self.data_source.load,
                              self.globals_def,
                              self.entities_map)

        globals_data = input_dataset.get('globals')
        timed(self.data_sink.prepare, self.globals_def, self.entities_map,
              input_dataset, self.start_period - 1)

        print(" * building arrays for first simulated period")
        for ent_name, entity in self.entities_map.iteritems():
            print("    -", ent_name, "...", end=' ')
            # TODO: this whole process of merging all periods is very
            # opinionated and does not allow individuals to die/disappear
            # before the simulation starts. We couldn't for example,
            # take the output of one of our simulation and
            # re-simulate only some years in the middle, because the dead
            # would be brought back to life. In conclusion, it should be
            # optional.
            timed(entity.build_period_array, self.start_period - 1)
        print("done.")

        if config.autodump or config.autodiff:
            if config.autodump:
                fname, _ = config.autodump
                mode = 'w'
            else:  # config.autodiff
                fname, _ = config.autodiff
                mode = 'r'
            fpath = os.path.join(config.output_directory, fname)
            h5_autodump = tables.open_file(fpath, mode=mode)
            config.autodump_file = h5_autodump
        else:
            h5_autodump = None

        # tell numpy we do not want warnings for x/0 and 0/0
        np.seterr(divide='ignore', invalid='ignore')

        process_time = defaultdict(float)
        period_objects = {}
        eval_ctx = EvaluationContext(self, self.entities_map, globals_data)

        def simulate_period(period_idx, period, processes, entities,
                            init=False):
            period_start_time = time.time()

            # set current period
            eval_ctx.period = period

            if config.log_level in ("functions", "processes"):
                print()
            print("period", period,
                  end=" " if config.log_level == "periods" else "\n")
            if init and config.log_level in ("functions", "processes"):
                for entity in entities:
                    print("  * %s: %d individuals" % (entity.name,
                                                      len(entity.array)))
            else:
                if config.log_level in ("functions", "processes"):
                    print("- loading input data")
                    for entity in entities:
                        print("  *", entity.name, "...", end=' ')
                        timed(entity.load_period_data, period)
                        print("    -> %d individuals" % len(entity.array))
                else:
                    for entity in entities:
                        entity.load_period_data(period)
            for entity in entities:
                entity.array_period = period
                entity.array['period'] = period

            if processes:
                num_processes = len(processes)
                for p_num, process_def in enumerate(processes, start=1):
                    process, periodicity = process_def

                    # set current entity
                    eval_ctx.entity_name = process.entity.name

                    if config.log_level in ("functions", "processes"):
                        print("- %d/%d" % (p_num, num_processes), process.name,
                              end=' ')
                        print("...", end=' ')
                    if period_idx % periodicity == 0:
                        elapsed, _ = gettime(process.run_guarded, eval_ctx)
                    else:
                        elapsed = 0
                        if config.log_level in ("functions", "processes"):
                            print("skipped (periodicity)")

                    process_time[process.name] += elapsed
                    if config.log_level in ("functions", "processes"):
                        if config.show_timings:
                            print("done (%s elapsed)." % time2str(elapsed))
                        else:
                            print("done.")
                    self.start_console(eval_ctx)

            if config.log_level in ("functions", "processes"):
                print("- storing period data")
                for entity in entities:
                    print("  *", entity.name, "...", end=' ')
                    timed(entity.store_period_data, period)
                    print("    -> %d individuals" % len(entity.array))
            else:
                for entity in entities:
                    entity.store_period_data(period)
#            print " - compressing period data"
#            for entity in entities:
#                print "  *", entity.name, "...",
#                for level in range(1, 10, 2):
#                    print "   %d:" % level,
#                    timed(entity.compress_period_data, level)
            period_objects[period] = sum(len(entity.array)
                                         for entity in entities)
            period_elapsed_time = time.time() - period_start_time
            if config.log_level in ("functions", "processes"):
                print("period %d" % period, end=' ')
            print("done", end=' ')
            if config.show_timings:
                print("(%s elapsed)" % time2str(period_elapsed_time), end="")
                if init:
                    print(".")
                else:
                    main_elapsed_time = time.time() - main_start_time
                    periods_done = period_idx + 1
                    remaining_periods = self.periods - periods_done
                    avg_time = main_elapsed_time / periods_done
                    # future_time = period_elapsed_time * 0.4 + avg_time * 0.6
                    remaining_time = avg_time * remaining_periods
                    print(" - estimated remaining time: %s."
                          % time2str(remaining_time))
            else:
                print()

        print("""
=====================
 starting simulation
=====================""")
        try:
            simulate_period(0, self.start_period - 1, self.init_processes,
                            self.entities, init=True)
            main_start_time = time.time()
            periods = range(self.start_period,
                            self.start_period + self.periods)
            for period_idx, period in enumerate(periods):
                simulate_period(period_idx, period,
                                self.processes, self.entities)

            total_objects = sum(period_objects[period] for period in periods)
            avg_objects = str(total_objects // self.periods) \
                if self.periods else 'N/A'
            main_elapsed_time = time.time() - main_start_time
            ind_per_sec = str(int(total_objects / main_elapsed_time)) \
                if main_elapsed_time else 'inf'

            print("""
==========================================
 simulation done
==========================================
 * %s elapsed
 * %s individuals on average
 * %s individuals/s/period on average
==========================================
""" % (time2str(time.time() - start_time), avg_objects, ind_per_sec))

            show_top_processes(process_time, 10)
#            if config.debug:
#                show_top_expr()

            if run_console:
                ent_name = self.default_entity
                if ent_name is None and len(eval_ctx.entities) == 1:
                    ent_name = eval_ctx.entities.keys()[0]
                # FIXME: fresh_data prevents the old (cloned) EvaluationContext
                # to be referenced from each EntityContext, which lead to period
                # being fixed to the last period of the simulation. This should
                # be fixed in EvaluationContext.copy but the proper fix breaks
                # stuff (see the comments there)
                console_ctx = eval_ctx.clone(fresh_data=True,
                                             entity_name=ent_name)
                c = console.Console(console_ctx)
                c.run()

        finally:
            self.close()
            if h5_autodump is not None:
                h5_autodump.close()
            if self.minimal_output:
                output_path = self.data_sink.output_path
                dirname = os.path.dirname(output_path)
                try:
                    os.remove(output_path)
                    os.rmdir(dirname)
                except OSError:
                    print("WARNING: could not delete temporary directory: %r"
                          % dirname)
Ejemplo n.º 44
0
 def load(self):
     return timed(self.data_source.load, self.globals_def,
                  entity_registry)
Ejemplo n.º 45
0
            elif ent_name in ent_names1:
                output_array = input1_array
            elif ent_name in ent_names2:
                output_array = input2_array
            else:
                raise Exception("this shouldn't have happened")
            output_table.append(output_array)
            output_table.flush()

        loop_wh_progress(merge_period, output_periods)
        print " done."

    input1_file.close()
    input2_file.close()
    output_file.close()


if __name__ == '__main__':
    import sys
    import platform

    print "LIAM HDF5 merge %s using Python %s (%s)\n" % \
          (__version__, platform.python_version(), platform.architecture()[0])

    args = sys.argv
    if len(args) < 4:
        print "Usage: %s inputpath1 inputpath2 outputpath" % args[0]
        sys.exit()

    timed(merge_h5, args[1], args[2], args[3])
Ejemplo n.º 46
0
    # copy globals
    if copy_globals:
        # noinspection PyProtectedMember
        input_file.root.globals._f_copy(output_file.root, recursive=True)

    output_entities = output_file.create_group("/", "entities", "Entities")
    for table in input_file.iterNodes(input_file.root.entities):
        # noinspection PyProtectedMember
        print(table._v_name, "...")
        copy_table(table, output_entities, condition=condition)

    input_file.close()
    output_file.close()


if __name__ == '__main__':
    import sys
    import platform

    print("LIAM HDF5 filter %s using Python %s (%s)\n" %
          (__version__, platform.python_version(), platform.architecture()[0]))

    args = dict(enumerate(sys.argv))
    if len(args) < 4:
        print("""Usage: {} inputpath outputpath condition [copy_globals]
where condition is an expression
      copy_globals is True (default)|False""".format(args[0]))
        sys.exit()

    timed(filter_h5, args[1], args[2], args[3], eval(args.get(4, 'True')))
Ejemplo n.º 47
0
    output_file = tables.openFile(output_path, mode="w")

    # copy globals
    input_file.root.globals._f_copy(output_file.root, recursive=True)

    output_entities = output_file.createGroup("/", "entities", "Entities")
    for table in input_file.iterNodes(input_file.root.entities):
        print table._v_name, "..."
        copyTable(table, output_entities, condition=condition)

    input_file.close()
    output_file.close()


if __name__ == "__main__":
    import sys
    import platform

    print "LIAM HDF5 filter %s using Python %s (%s)\n" % (
        __version__,
        platform.python_version(),
        platform.architecture()[0],
    )

    args = sys.argv
    if len(args) < 4:
        print "Usage: %s inputpath outputpath condition" % args[0]
        sys.exit()

    timed(filter_h5, args[1], args[2], args[3])
Ejemplo n.º 48
0
import utils

from contextlib import closing

from django.db import connection
from django.utils import timezone


def sql_simple_insert_executemany(n_records):
    with closing(connection.cursor()) as cursor:
        cursor.executemany(
            'INSERT INTO app_testmodel (field_1, field_2, field_3)'
            'VALUES (%s, %s, %s)',
            [(i, str(i), timezone.now()) for i in xrange(0, n_records)],
        )


if __name__ == '__main__':
    utils.timed(sql_simple_insert_executemany)
Ejemplo n.º 49
0
def main() -> None:
    with utils.timed():
        print(f"Part 1: {part1()}")

    with utils.timed():
        print(f"Part 2: {part2()}")
Ejemplo n.º 50
0
Archivo: run.py Proyecto: benpastel/m5
def load_data():
    if os.path.isfile(XY_CACHE):
        print('loading from cache')
        cached = np.load(XY_CACHE)
        return cached['X'], cached['y']

    with timed('loading data from csv...'):
        day_frame = pd.read_csv('data/calendar.csv')

        # feats:
        #   day of week
        #   day of month
        #   month
        #   event_1 (encoded in [0, 32])
        #   snap    (encoded in [0, 2**3])
        day_feats = np.zeros((ALL_DAYS, 5), dtype=np.uint8)
        assert len(day_frame) == ALL_DAYS
        day_feats[:, 0] = day_frame['wday'].values

        # parse the day part from YYYY-MM-DD
        day_feats[:,
                  1] = [int(date.split('-')[2]) for date in day_frame['date']]

        day_feats[:, 2] = day_frame['month'].values

        # for simplicity, ignore event_name_2
        # TODO try using it
        _, event_codes = np.unique(
            day_frame['event_name_1'].values.astype(str), return_inverse=True)
        day_feats[:, 3] = event_codes

        # for simplicity, dense-code the snap
        # TODO eventually match it properly based on geography
        day_feats[:, 4] = (day_frame['snap_CA'].values +
                           2 * day_frame['snap_TX'].values +
                           4 * day_frame['snap_WI'].values)

        assert np.min(day_feats[:, 0]) == 1
        assert np.max(day_feats[:, 0]) == 7
        assert np.min(day_feats[:, 1]) == 1
        assert np.max(day_feats[:, 1]) == 31
        assert np.min(day_feats[:, 2]) == 1
        assert np.max(day_feats[:, 2]) == 12
        assert np.min(day_feats[:, 3]) == 0
        assert np.max(day_feats[:, 3]) == 30
        assert np.min(day_feats[:, 4]) == 0
        assert np.max(day_feats[:, 4]) == 7

        sales_frame = pd.read_csv('data/sales_train_validation.csv')
        sales = load_sales()

        # y is (days, ids) after the first year
        y = sales[SKIP_DAYS:, :]

        uniques = {}
        ordinals = {}
        for col in ['item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']:
            u, inverse = np.unique(sales_frame[col], return_inverse=True)
            uniques[col] = u
            ordinals[col] = inverse

        target_days = DATA_DAYS - SKIP_DAYS
        feats = 5 + 4 + (5 * 2)
        X = np.zeros((target_days, feats, ALL_IDS), dtype=np.float32)

        # broadcast over all ids
        train_day_feats = day_feats[SKIP_DAYS:SKIP_DAYS + target_days, :]
        X[:, 0:5, :] = train_day_feats.reshape(target_days, 5, 1)

        # for dept_id, cat_id, store_id, state_id there are few enough values we can just
        # pass them in as ordinals directly.
        # broadcast over all days
        X[:, 5, :] = ordinals['dept_id']
        X[:, 6, :] = ordinals['cat_id']
        X[:, 7, :] = ordinals['store_id']
        X[:, 8, :] = ordinals['state_id']

        # for id and item_id, try a bunch of different embeddings.
        # for now, all sales aggregations must be validation-safe:
        # they only use info from t - VALID_DAYS and earlier.

        # start with:
        #   (1) mean, min, max, std, nonzero over previous year
        # TODO:
        #   try over different windows (month, week)
        #   try over different slices (sharing holiday; sharing day of week; etc.)

        # ID features
        for t in range(target_days):
            if (t % 100) == 0:
                print(f'{t}/{target_days}')

            d = SKIP_DAYS + t - VALID_DAYS

            assert d - 365 >= 0
            group = sales[d - 365:d]

            X[t, 9, :] = np.mean(group, axis=0)
            X[t, 10, :] = np.min(group, axis=0)
            X[t, 11, :] = np.max(group, axis=0)
            X[t, 12, :] = np.std(group, axis=0)
            X[t, 13, :] = np.count_nonzero(group, axis=0)

            for i in range(len(uniques['item_id'])):
                is_item = (ordinals['item_id'] == i)
                item_group = group[:, is_item]

                # print(f'{ordinals["item_id"].shape=}')
                # print(f'{is_item.shape=}, {np.count_nonzero(is_item)=}')
                # print(f'{group.shape=}, {item_group.shape=}, {np.mean(item_group, axis=0).shape=}, {X[t, 14, is_item].shape=}')

                X[t, 14, is_item] = np.mean(item_group, axis=0)
                X[t, 15, is_item] = np.min(item_group, axis=0)
                X[t, 16, is_item] = np.max(item_group, axis=0)
                X[t, 17, is_item] = np.std(item_group, axis=0)
                X[t, 18, is_item] = np.count_nonzero(item_group, axis=0)

    X = np.swapaxes(X, 1, 2)
    assert X.shape == (target_days, ALL_IDS, feats)

    with timed('saving...'):
        np.savez_compressed(XY_CACHE, X=X, y=y)
    return X, y
Ejemplo n.º 51
0
def merge_h5(input1_path, input2_path, output_path):
    input1_file = tables.open_file(input1_path)
    input2_file = tables.open_file(input2_path)
    output_file = tables.open_file(output_path, mode="w")

    input1root = input1_file.root
    input2root = input2_file.root

    merge_group(input1root, input2root, 'globals', output_file, 'PERIOD')
    merge_group(input1root, input2root, 'entities', output_file, 'period')

    input1_file.close()
    input2_file.close()
    output_file.close()


if __name__ == '__main__':
    import sys
    import platform

    print("LIAM HDF5 merge %s using Python %s (%s)\n" %
          (__version__, platform.python_version(), platform.architecture()[0]))

    args = sys.argv
    if len(args) < 4:
        print("Usage: %s inputpath1 inputpath2 outputpath" % args[0])
        sys.exit()

    timed(merge_h5, args[1], args[2], args[3])
Ejemplo n.º 52
0
        return l1_norm(exp(self.θ @ atleast_2d(x).T), axis=0)

    def predict(self, x):
        # 输出概率最大的数字
        return argmax(self.predict_proba(x), axis=0)

    def test(self, x, y):
        with timed('predict'):
            y_pred = model.predict(x)
        print('========== accuracy_score = {}'.format(accuracy_score(y, y_pred)))
        print('========== confusion_matrix:')
        print(confusion_matrix(y, y_pred))


if __name__ == '__main__':
    with timed('prepare data'):
        (X_train, y_train), (X_test, y_test) = mnist.load_data()
        # 注意要加上截距(方便模型学到偏置)
        X_train = append_bias(X_train)
        X_test = append_bias(X_test)
        # 与 softmax_wendesi保持类似的数据规模
        X_train, y_train = sample_dataset(X_train, y_train, n=28140, seed=66)
        X_test, y_test = sample_dataset(X_test, y_test, n=13860, seed=66)

    model = Softmax()
    with timed('fit'):
        # 当η=1e-3 也会出现 inf
        model.fit(X_train, y_train, n_epoch=100, η=1e-4)

    model.test(X_test, y_test)
Ejemplo n.º 53
0
 def load(self):
     return timed(self.data_source.load, self.globals_def, self.entities_map)
Ejemplo n.º 54
0
    def run(self, run_console=False):
        start_time = time.time()
        h5in, h5out, globals_data = timed(self.data_source.run,
                                          self.globals_def,
                                          entity_registry,
                                          self.start_period - 1)

        if config.autodump or config.autodiff:
            if config.autodump:
                fname, _ = config.autodump
                mode = 'w'
            else:  # config.autodiff
                fname, _ = config.autodiff
                mode = 'r'
            fpath = os.path.join(config.output_directory, fname)
            h5_autodump = tables.openFile(fpath, mode=mode)
            config.autodump_file = h5_autodump
        else:
            h5_autodump = None

#        input_dataset = self.data_source.run(self.globals_def,
#                                             entity_registry)
#        output_dataset = self.data_sink.prepare(self.globals_def,
#                                                entity_registry)
#        output_dataset.copy(input_dataset, self.start_period - 1)
#        for entity in input_dataset:
#            indexed_array = build_period_array(entity)

        # tell numpy we do not want warnings for x/0 and 0/0
        np.seterr(divide='ignore', invalid='ignore')

        process_time = defaultdict(float)
        period_objects = {}

        def simulate_period(period_idx, period, processes, entities,
                            init=False):
            print("\nperiod", period)
            if init:
                for entity in entities:
                    print("  * %s: %d individuals" % (entity.name,
                                                      len(entity.array)))
            else:
                print("- loading input data")
                for entity in entities:
                    print("  *", entity.name, "...", end=' ')
                    timed(entity.load_period_data, period)
                    print("    -> %d individuals" % len(entity.array))
            for entity in entities:
                entity.array_period = period
                entity.array['period'] = period

            if processes:
                # build context for this period:
                const_dict = {'__simulation__': self,
                              'period': period,
                              'nan': float('nan'),
                              '__globals__': globals_data}

                num_processes = len(processes)
                for p_num, process_def in enumerate(processes, start=1):
                    process, periodicity = process_def

                    print("- %d/%d" % (p_num, num_processes), process.name,
                          end=' ')
                    print("...", end=' ')
                    if period_idx % periodicity == 0:
                        elapsed, _ = gettime(process.run_guarded, self,
                                             const_dict)
                    else:
                        elapsed = 0
                        print("skipped (periodicity)")

                    process_time[process.name] += elapsed
                    if config.show_timings:
                        print("done (%s elapsed)." % time2str(elapsed))
                    else:
                        print("done.")
                    self.start_console(process.entity, period,
                                       globals_data)

            print("- storing period data")
            for entity in entities:
                print("  *", entity.name, "...", end=' ')
                timed(entity.store_period_data, period)
                print("    -> %d individuals" % len(entity.array))
#            print " - compressing period data"
#            for entity in entities:
#                print "  *", entity.name, "...",
#                for level in range(1, 10, 2):
#                    print "   %d:" % level,
#                    timed(entity.compress_period_data, level)
            period_objects[period] = sum(len(entity.array)
                                         for entity in entities)

        try:
            simulate_period(0, self.start_period - 1, self.init_processes,
                            self.entities, init=True)
            main_start_time = time.time()
            periods = range(self.start_period,
                            self.start_period + self.periods)
            for period_idx, period in enumerate(periods):
                period_start_time = time.time()
                simulate_period(period_idx, period,
                                self.processes, self.entities)
                time_elapsed = time.time() - period_start_time
                print("period %d done" % period, end=' ')
                if config.show_timings:
                    print("(%s elapsed)." % time2str(time_elapsed))
                else:
                    print()

            total_objects = sum(period_objects[period] for period in periods)
            total_time = time.time() - main_start_time
            try:
                ind_per_sec = str(int(total_objects / total_time))
            except ZeroDivisionError:
                ind_per_sec = 'inf'

            print("""
==========================================
 simulation done
==========================================
 * %s elapsed
 * %d individuals on average
 * %s individuals/s/period on average
==========================================
""" % (time2str(time.time() - start_time),
       total_objects / self.periods,
       ind_per_sec))

            show_top_processes(process_time, 10)
#            if config.debug:
#                show_top_expr()

            if run_console:
                c = console.Console(self.console_entity, periods[-1],
                                    self.globals_def, globals_data)
                c.run()

        finally:
            if h5in is not None:
                h5in.close()
            h5out.close()
            if h5_autodump is not None:
                h5_autodump.close()