Ejemplo n.º 1
0
def generate_distance_matrix(pickup_grid, delivery_grid, from_file=None):
  '''
  distance_matrix[job1id].keys() is all the neighbors of job1
  distance_matrix[job1id][job2id] = total distance doing end(job1) -> start(job2) -> end(job2)
  '''
  if from_file:
    return json.load(open(from_file))

  distance_matrix = collections.defaultdict(dict)
  
  print 'Building matrix...'
  progress, total =  0, sum(len(v) for v in delivery_grid.values())
  print total 

  for (start_lat, start_lng) in delivery_grid:
    neighbors = util.neighbors(pickup_grid, start_lat, start_lng, k=2)
    for job1 in delivery_grid[(start_lat, start_lng)]:
      util.print_progress(progress, total)
      for job2 in neighbors:
        if job2['pickupDate'] < job1['pickupDate']: continue

        d1 = util.interjob_distance(job1, job2)
        d2 = util.intrajob_distance(job2)

        distance_matrix[job1['_id']['$oid']][job2['_id']['$oid']] = d1 + d2

      progress += 1
  
  with open(os.path.join('project_data', 'distance_matrix.json'), 'w') as out:
    json.dump(distance_matrix, out) # Dump it so that we don't have to run this entire procedure again
  
  return distance_matrix
Ejemplo n.º 2
0
def download_asset(temp_dir: str, url: str, filename: str):
    """Downloads a file in 512 byte chunks from a GitHub octet-stream.

    :param temp_dir: Temporary directory to be used
    :type temp_dir: str
    :param url: URL to the octet-stream API call
    :type url: str
    :param filename: Name of the downloaded file
    :type filename: str
    """

    logging.info("Sending asset download request...")
    response = requests.send_request(url, "application/octet-stream")
    response_size = int(response.headers['content-length'])

    logging.debug(f"Started downloading {response_size} bytes...")

    with open(os.path.join(temp_dir, filename), "wb") as file:
        bytes_read = 0

        chunk_size = 512
        for chunk in response.iter_content(chunk_size):
            file.write(chunk)

            bytes_read += chunk_size
            util.print_progress(bytes_read, response_size, suffix="Downloaded")

    logging.info(f"Successfully finished downloading {filename}")
    return True
Ejemplo n.º 3
0
def migrate_users(old_db, new_db):

    old_user_coll = old_db["user"]
    new_user_coll = new_db["user"]
    old_users = old_user_coll.find()
    user_migrates = {}
    total_user_count = old_user_coll.count_documents({})
    processed_count = 0
    migrated_count = 0
    skipped_count = 0
    failed_usernames = []

    for old_user in old_users:
        result = migrate_user(old_user, new_user_coll)
        if "failed" in result:
            failed_usernames.append(result["username"])
        else:
            new_user_id = result["_id"]
            user_migrates[old_user["_id"]] = new_user_id
            if result["existed"]:
                skipped_count += 1
            else:
                migrated_count += 1
        processed_count += 1
        print_progress(processed_count,
                       total_user_count,
                       prefix="Migrating users...")

    failed_count = len(failed_usernames)
    print(
        f"Total: {total_user_count}, migrated: {migrated_count}, skipped: {skipped_count}, failed: {failed_count}"
    )
    if failed_count > 0:
        print(f"Failed usernames: {failed_usernames}")
    return user_migrates
Ejemplo n.º 4
0
    def load_invoices_and_map_labels(self,
                                     data_path: str,
                                     autoload=False,
                                     verbose: bool = False):
        # This tuple represents the number of pages to do OCR for for each invoice. Eg. (2,1) represents do OCR for the first 2 pages, and for the last page
        RANGE_OF_PAGES_FOR_OCR = (2, 2)
        invoices = []
        pdf_list = list(
            filter(lambda file_name: file_name.endswith(".pdf"),
                   os.listdir(data_path)))
        for index, filename in enumerate(pdf_list):
            invoice = Invoice(data_path + "/" + filename)

            if autoload:
                loaded = invoice.load_data()
                if loaded:
                    invoices.append(invoice)
                else:
                    raise Exception("Autoloading of invoice",
                                    invoice.readable_name,
                                    "failed. Check if the savefile exists")

            else:
                # First check if json tags are present. If they aren't, skip this pdf
                if not os.path.exists(data_path + "/" + filename[:-4] +
                                      ".json"):
                    print(
                        "Warning: json tags for",
                        filename,
                        "does not exist. Check if they are in the same folder. Skipping this pdf",
                    )
                    continue

                # Next, do OCR for the relevant pages in the invoice
                if verbose:
                    print("\n Processing:", invoice.readable_name)

                if invoice.length() < sum(RANGE_OF_PAGES_FOR_OCR):
                    for page in invoice.pages:
                        page.do_OCR(verbose=verbose)
                else:
                    for page in invoice.pages[:RANGE_OF_PAGES_FOR_OCR[
                            0]]:  # Initial pages
                        page.do_OCR(verbose=verbose)
                    for page in invoice.pages[
                            -RANGE_OF_PAGES_FOR_OCR[1]:]:  #Last pages
                        page.do_OCR(verbose=verbose)

                # Try mapping labels
                invoice.map_labels(verbose=verbose)
                invoices.append(invoice)
                invoice.save_data()

            if verbose:
                print_progress(index + 1, len(pdf_list), "Loading invoices ")

        return invoices
Ejemplo n.º 5
0
    def _connect(self):
        """spawn the interactive subprocess."""
        if not (hasattr(self, 'cmdline') and self.cmdline):
            # No cmd to run, return a class instance without subprocess process.
            return None

        # open the process, as a subprocess.Popen object.
        self.print_input('Starting interactive-process %s: %s\n' % (self.name, self.cmdline))
        cmd_list = self.cmdline if self.use_shell else shlex.split(self.cmdline)

        for attempt in xrange(self._init_retry + 1):
            if self.use_pty_stdin or self.use_pty_stdout:
                # Use a PTY pseudo terminal, for program that does tcgetattr, or other cases.
                # with util.SudoPrivilege():   #FIXME: openpty may get 'Out of devices' error w/o sudo
                master, slave = pty.openpty()
            p = subprocess.Popen(cmd_list, bufsize=0,
                                 stdin=slave if self.use_pty_stdin else subprocess.PIPE,
                                 stdout=slave if self.use_pty_stdout else subprocess.PIPE,
                                 stderr=subprocess.PIPE, shell=self.use_shell,
                                 universal_newlines=True)
            self.stdin = os.fdopen(master, 'w', 0) if self.use_pty_stdin else p.stdin
            self.stdout = os.fdopen(master, 'rU', 0) if self.use_pty_stdout else p.stdout
            self.stderr = p.stderr
            if not p:
                self.print_warn('fail to open %s process, "%s".' % (self.name, self.cmdline))

            # change the pipe to non-blocking
            util.tty_nonblocking(self.stdout)
            util.tty_nonblocking(self.stderr)
            # disable echo for cleaner output
            if self._disable_echo:
                util.term_set_echo(self.stdin.fileno(), enable=False)

            self.process = p

            self.peek()
            if self.is_alive():
                # connected
                break
            util.print_progress('Will retry "%s" in 1 sec, %s attempt ...' %
                                (self.cmdline, attempt + 1), color=['cyan'])
            time.sleep(1)

        self.print_input('\n')
        if not self.is_alive():
            self.print_error('fail to connect to %s, "%s", with %d retries.'
                             % (self.name, self.cmdline, self._init_retry))
            return None

        if self._init_flush:
            # set continuous_output to reuse previous flushed output
            self.flush(expect='', hide_output=self.hide_output, continuous_output=True)

        self._had_connect = True
        return self.process
Ejemplo n.º 6
0
def migrate_mangas(old_db, new_db):

  old_manga_coll = old_db["manga"]
  new_manga_coll = new_db["manga"]
  old_mangas = old_manga_coll.find()
  total_manga_count = old_manga_coll.count_documents({})
  migrated_count = 0
  processed_count = 0
  skipped_count = 0
  failed_dmk_ids = []
  manga_migrates = {}

  # Generate the batches
  batch_size = 50
  batches = []
  count = 0
  for old_manga in old_mangas:
    if count == 0:
      batches.append([])
    batches[-1].append(old_manga)
    count += 1
    if count == batch_size:
      count = 0

  # Fetch in batches
  for batch in batches:

    # Generate the threads
    threads = [] # List[Tuple[Manga, Thread]]
    for old_manga in batch:
      thread = migrate_manga(old_manga, new_manga_coll)
      threads.append((old_manga, thread))
      # print_progress(len(threads), total_manga_count, prefix="Spawning threads...")

    # Join all the threads and get the result
    for (old_manga, thread) in threads:
      result = thread.wait()
      processed_count += 1
      if result["failed"]:
        failed_dmk_ids.append(old_manga["dmk_id"])
      else:
        if result["existed"]:
          skipped_count += 1
        else:
          migrated_count += 1
        manga_migrates[old_manga["_id"]] = result
      print_progress(processed_count, total_manga_count, prefix="Migrating mangas...")

  failed_count = len(failed_dmk_ids)
  print(f"Total: {total_manga_count}, Completed: {migrated_count}, Skipped: {skipped_count}, Failed: {failed_count}")
  if failed_count > 0:
    print(f"Failed dmk ids: {failed_dmk_ids}")
  return manga_migrates
Ejemplo n.º 7
0
    def cmd_poll(self, cmd, pattern, reverse=False, sum_value=None, max_times=10, interval=0.1,
                 initial_delay=0, verbose=1, *args, **kwargs):
        """Execute a command multiple times until a specified pattern is in the command output.
        E.g. waiting for a prcoess completing some time consuming task by polling a command.

        - cmd: command to be executed.
        - pattern: a regex pattern, once it is found in the command output, polling is completed.
        - reverse: reverse the logic, once the pattern is not longer found, polling is completed.
        - sum_value: if specified, poll till the sum of multiple pattern instances matches the value.
          Note, using both reverse and sum_value is not supported.
        - max_times: specify the maximum times the command should be executed to wait for the pattern.
        - interval: time interval between command execution, specified in fraction number of seconds.
        - initial_delay: time to be delayed before starting the polling.
        - verbose: 0: no log, 1: show output of last poll, 2: show output from all polling iterations.
        - \*args, \*\*kwargs: optional parameters for self.cmd()

        - return: (output, stderr_output, result), outputs of the last execution, and whether the polling
          succeeds in getting expected result.
        """
        o = e = ''
        i = 0
        for i in xrange(max_times + 1):
            if not self.is_alive():
                self.print_error('\n%s not alive, cmd_poll "%s" returned, polled %d times' %
                                 (self.name, cmd, i))
                return o, e, None
            time.sleep(initial_delay if i == 0 else interval)
            o, e, r = self.cmd_search(cmd, pattern=pattern, reverse=reverse, sum_value=sum_value,
                                      verbose=(verbose == 2), *args, **kwargs)
            if r:
                break
            progress = ('cmd_poll: time spent %.2f seconds, (delay %s, interval %s, %s times), result %s ...' %
                        (i * interval + initial_delay, initial_delay, interval, i, r))
            if verbose == 2:
                self.print_input('%s\n' % progress)
            elif verbose == 1:
                util.print_progress(progress, color=['cyan'])
        else:
            # did not get the expected polling result
            r = False
        util.print_progress('')
        if verbose:
            self.print_input('%s cmd_poll "%s": waited %.2f seconds, result %s\n'
                             % (self.name, cmd, i * interval + initial_delay, r))
        if not r:
            self.print_error('%s cmd_poll failed %sto get "%s", waited %.2f seconds' %
                             (self.name, 'NOT ' if reverse else '', pattern, i * interval + initial_delay))
        if verbose == 1 or (verbose == 0 and not r):
            self.print_output('\n%s\n' % o)
            self.print_stderr('%s\n' % e)
        return o, e, r
Ejemplo n.º 8
0
def evaluate_network(network, target, target_host, number):
    # connect to remote device
    tracker = tvm.rpc.connect_tracker(args.host, args.port)
    remote = tracker.request(args.rpc_key)

    print_progress(network)
    net, params, input_shape, output_shape = get_network(network, batch_size=1)

    print_progress("%-20s building..." % network)
    with nnvm.compiler.build_config(opt_level=3):
        graph, lib, params = nnvm.compiler.build(net,
                                                 target=target,
                                                 target_host=target_host,
                                                 shape={'data': input_shape},
                                                 params=params,
                                                 dtype=dtype)

    tmp = tempdir()
    if 'android' in str(target):
        from tvm.contrib import ndk
        filename = "%s.so" % network
        lib.export_library(tmp.relpath(filename), ndk.create_shared)
    else:
        filename = "%s.tar" % network
        lib.export_library(tmp.relpath(filename))

    # upload library and params
    print_progress("%-20s uploading..." % network)
    ctx = remote.context(str(target), 0)
    remote.upload(tmp.relpath(filename))
    rparams = {k: tvm.nd.array(v, ctx) for k, v in params.items()}

    rlib = remote.load_module(filename)
    module = runtime.create(graph, rlib, ctx)
    data_tvm = tvm.nd.array(
        (np.random.uniform(size=input_shape)).astype(dtype))
    module.set_input('data', data_tvm)
    module.set_input(**rparams)

    del rparams

    # evaluate
    print_progress("%-20s evaluating..." % network)
    ftimer = module.module.time_evaluator("run",
                                          ctx,
                                          number=args.number,
                                          repeat=3)
    prof_res = np.array(
        ftimer().results) * 1000  # multiply 1000 for converting to millisecond
    print(
        "%-20s %-19s (%s)" %
        (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))
Ejemplo n.º 9
0
def geometric_voting_abs(boxes, km_pp, r_tol, d_tol, wmax, verb=False):
    global radius_candidates, rtol, dtol, centers, vote_db, kmpp, w_max, relative_search, verbose
    relative_search = False
    vote_db = {}
    radius_candidates = {}
    dtol = d_tol
    rtol = r_tol
    w_max = wmax
    kmpp = km_pp
    n = boxes.shape[0]
    radii = np.zeros(n)
    center_points = np.zeros((n, 2))

    # calculate all radii and center points
    for i, box in enumerate(boxes):
        radii[i] = util.calculate_radius(box) * kmpp
        center_points[i] = util.calculate_center(box)
    # get sorting vector
    sort = radii.argsort()[::-1]
    # sort dat shit
    radii = radii[sort]
    center_points = center_points[sort]
    center_points *= kmpp
    centers = center_points
    # get the radius candidates
    for c, r in enumerate(radii):
        radius_candidates[c] = pdb.get_craters_by_real_radius(r, r_tol)
    tuples = list(combos(np.arange(n), 2))
    t = len(tuples)
    intersect = False
    for count, (ci, cj) in enumerate(tuples):
        util.print_progress(count, t, 'Pairs loop', 'complete')
        ci_candidates, cj_candidates = search_pair(ci, cj, False)
        vote_db = add_votes_to_db(vote_db, ci, ci_candidates)
        vote_db = add_votes_to_db(vote_db, cj, cj_candidates)
        if not intersect:
            vote_db = add_votes_to_db(vote_db, ci, radius_candidates[ci])
            vote_db = add_votes_to_db(vote_db, cj, radius_candidates[cj])
    # check_db()
    return do_final_validation()
Ejemplo n.º 10
0
def evaluate_network(network, target, target_host, dtype, repeat):
    # connect to remote device
    tracker = tvm.rpc.connect_tracker(args.host, args.port)
    remote = tracker.request(args.rpc_key)

    print_progress(network)
    net, params, input_shape, output_shape = get_network(network,
                                                         batch_size=1,
                                                         dtype=dtype)

    print_progress("%-20s building..." % network)
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(net,
                          target=target,
                          target_host=target_host,
                          params=params)

    tmp = tempdir()
    if "android" in str(target) or "android" in str(target_host):
        from tvm.contrib import ndk

        filename = "%s.so" % network
        lib.export_library(tmp.relpath(filename), ndk.create_shared)
    else:
        filename = "%s.tar" % network
        lib.export_library(tmp.relpath(filename))

    # upload library and params
    print_progress("%-20s uploading..." % network)
    dev = remote.device(str(target), 0)
    remote.upload(tmp.relpath(filename))

    rlib = remote.load_module(filename)
    module = runtime.GraphModule(rlib["default"](dev))
    data_tvm = tvm.nd.array(
        (np.random.uniform(size=input_shape)).astype(dtype))
    module.set_input("data", data_tvm)

    # evaluate
    print_progress("%-20s evaluating..." % network)
    ftimer = module.module.time_evaluator("run", dev, number=1, repeat=repeat)
    prof_res = np.array(
        ftimer().results) * 1000  # multiply 1000 for converting to millisecond
    print(
        "%-20s %-19s (%s)" %
        (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))
Ejemplo n.º 11
0
def evaluate_network(network, target, target_host, dtype, repeat):
    # connect to remote device
    tracker = tvm.rpc.connect_tracker(args.host, args.port)
    remote = tracker.request(args.rpc_key)

    print_progress(network)
    net, params, input_shape, output_shape = get_network(network, batch_size=1, dtype=dtype)

    print_progress("%-20s building..." % network)
    with nnvm.compiler.build_config(opt_level=3):
        graph, lib, params = nnvm.compiler.build(
            net, target=target, target_host=target_host,
            shape={'data': input_shape}, params=params, dtype=dtype)

    tmp = tempdir()
    if 'android' in str(target) or 'android' in str(target_host):
        from tvm.contrib import ndk
        filename = "%s.so" % network
        lib.export_library(tmp.relpath(filename), ndk.create_shared)
    else:
        filename = "%s.tar" % network
        lib.export_library(tmp.relpath(filename))

    # upload library and params
    print_progress("%-20s uploading..." % network)
    ctx = remote.context(str(target), 0)
    remote.upload(tmp.relpath(filename))

    rlib = remote.load_module(filename)
    module = runtime.create(graph, rlib, ctx)
    data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
    module.set_input('data', data_tvm)
    module.set_input(**params)

    # evaluate
    print_progress("%-20s evaluating..." % network)
    ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=repeat)
    prof_res = np.array(ftimer().results) * 1000  # multiply 1000 for converting to millisecond
    print("%-20s %-19s (%s)" % (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))
Ejemplo n.º 12
0
def migrate_follows(old_db, new_db, user_migrates, manga_migrates):
    """
  user_migrates: { [old_user_id]: [new_user_id] }
  manga_migrates: { [old_manga_id]: { "_id": [new_manga_id], "dmk_id": [dmk_id] } }
  """

    old_follow_coll = old_db["follow"]
    new_follow_coll = new_db["follow"]
    old_follows = old_follow_coll.find()

    total_follow_count = old_follow_coll.count_documents({})
    migrated_count = 0
    processed_count = 0
    skipped_count = 0

    failed_follows = []

    for old_follow in old_follows:
        old_user_id = old_follow["user_id"]
        old_manga_id = old_follow["manga_id"]
        try:
            if old_user_id in user_migrates:
                new_user_id = user_migrates[old_user_id]
                if old_manga_id in manga_migrates:
                    manga_dmk_id = manga_migrates[old_manga_id]["dmk_id"]

                    # Check if the follow has already existed
                    new_follow = new_follow_coll.find_one({
                        "user_id":
                        ObjectId(new_user_id),
                        "manga_dmk_id":
                        manga_dmk_id
                    })

                    if new_follow:
                        skipped_count += 1
                    else:

                        # Is liked information
                        if "liked" in old_follow:
                            is_liked = old_follow["liked"]
                        else:
                            is_liked = False

                        if "start_date" in old_follow:
                            start_date_time = old_follow["start_date"]
                        else:
                            start_date_time = datetime.datetime.utcnow()

                        # Generate a new follow document
                        new_follow = {
                            "user_id": ObjectId(new_user_id),
                            "manga_dmk_id": manga_dmk_id,
                            "start_date_time": start_date_time,
                            "update_date_time": old_follow["update_date"],
                            "current_episode": old_follow["current_episode"],
                            "max_episode": old_follow["max_episode"],
                            "is_up_to_date": old_follow["up_to_date"],
                            "is_liked": is_liked
                        }

                        # Insert into the new collection
                        new_follow_coll.insert_one(new_follow)
                        migrated_count += 1
                else:
                    failed_follows.append({
                        "old_user_id": old_user_id,
                        "old_manga_id": old_manga_id,
                        "reason": "Manga not found"
                    })
            else:
                failed_follows.append({
                    "old_user_id": old_user_id,
                    "old_manga_id": old_manga_id,
                    "reason": "User not found"
                })
        except Exception as err:
            failed_follows.append({
                "old_user_id": old_user_id,
                "old_manga_id": old_manga_id,
                "reason": str(err)
            })
        finally:
            processed_count += 1
            print_progress(processed_count,
                           total_follow_count,
                           prefix="Migrating follow...")

    failed_count = len(failed_follows)
    print(
        f"Total: {total_follow_count}, migrated: {migrated_count}, skipped: {skipped_count}, failed: {failed_count}"
    )
    if failed_count > 0:
        print(f"Failed follows: {failed_follows}")
Ejemplo n.º 13
0
def encode(encoder, threads, ffmpeg_path, encoder_path, job):
    encoder_params = job.encoder_params
    ffmpeg_params = job.ffmpeg_params

    vfs = [f"select=gte(n\\,{job.start})"]

    if "-vf" in ffmpeg_params:
        idx = ffmpeg_params.index("-vf")
        del ffmpeg_params[idx]
        if idx + 1 < len(ffmpeg_params):
            vfs.append(ffmpeg_params[idx + 1])
            del ffmpeg_params[idx + 1]

    vf = ",".join(vfs)

    output_filename = f"tmp{job.segment}.ivf"
    log_path = f"tmp{job.segment}.log"

    ffmpeg = [
        ffmpeg_path,
        "-y",
        "-loglevel",
        "error",
        "-i",
        job.filename,
        "-strict",
        "-1",
        "-pix_fmt",
        "yuv420p10le",
        "-vsync",
        "0",
        "-vf",
        vf,
        "-vframes",
        job.frames,
    ]

    ffmpeg.extend(ffmpeg_params)
    ffmpeg.extend(["-f", "yuv4mpegpipe", "-"])

    ffmpeg = [str(s) for s in ffmpeg]

    aom = [
        encoder_path,
        "-",
        "--ivf",
        f"--fpf={log_path}",
        f"--threads={threads}",
        f"--passes={job.passes}",
        "-o",
        output_filename,
    ] + encoder_params

    aom = [str(s) for s in aom]

    if job.passes == 2:
        pass1 = [a for a in aom if not a.startswith("--denoise-noise-level")]
        passes = [
            pass1 + ["--pass=1"],
            aom + ["--pass=2"],
        ]

    # if job.grain_table:
    #  if not job.has_grain:
    #    return False, None
    #  else:
    #    passes[-1].append(f"--film-grain-table={job.grain}")

    total_frames = job.frames

    ffmpeg_pipe = None

    try:
        for pass_n, cmd in enumerate(passes, start=1):
            ffmpeg_pipe = subprocess.Popen(
                ffmpeg,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )

            job.pipe = subprocess.Popen(
                cmd,
                stdin=ffmpeg_pipe.stdout,
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                universal_newlines=True,
            )

            job.progress = (pass_n, 0)
            job.update_status(
                "{:.3s}".format(encoder),
                "pass:"******"frame.*?\/([^ ]+?) ", line)
                    if match:
                        frames = int(match.group(1))
                        job.progress = (pass_n, frames)
                        job.update_progress()
                        job.update_status(
                            "{:.3s}".format(encoder),
                            "pass:"******"\n".join(output))

    except JobStopped:
        logging.info("Job stopped")
        return None
    except:
        logging.error(traceback.format_exc())
        return None
    finally:
        if ffmpeg_pipe:
            ffmpeg_pipe.kill()
        job.pipe.kill()

        if os.path.isfile(log_path):
            try:
                os.remove(log_path)
            except:
                pass

    return output_filename
def add_missing_columns(con, table_name, db_schema_str, site_url_visit_id_map):
    col_to_replace = None
    existing_columns = get_column_names_from_db_schema_str(
        table_name, db_schema_str)
    if "top_url" in existing_columns:
        col_to_replace = "top_url"
    elif "page_url" in existing_columns:
        col_to_replace = "page_url"

    # column names from the up to date DB schema
    new_columns = get_column_names_from_create_query(TABLE_SCHEMAS[table_name])
    if new_columns == existing_columns:
        # print "No missing columns to add to", table_name
        return False
    print "Will add missing columns to %s: %s" % (
        table_name, set(new_columns).difference(set(existing_columns)))

    processed = 0
    data_to_insert = []
    num_rows = con.execute("SELECT MAX(id) FROM %s" % table_name).fetchone()[0]
    # Copy the existing table to a temp table
    tmp_table_name = "_%s_old" % table_name
    con.execute("ALTER TABLE %s RENAME TO %s;" % (table_name, tmp_table_name))

    # create table with the most recent schema
    con.execute(TABLE_SCHEMAS[table_name])

    # only keep the columns that also appear in the new table schema
    common_columns = [
        column for column in existing_columns if column in new_columns
    ]

    t0 = time()
    # replace top_url and page_url columns with visit_id
    if col_to_replace:
        print "Will replace %s with visit_id" % col_to_replace
        assert "visit_id" not in existing_columns
        # select from columns that are common to old and new table schemas
        # col_to_replace is either top_url or page_url
        # we use is to get the visit_id
        cols_to_select = common_columns + [
            col_to_replace,
        ]
        cols_to_insert = common_columns + [
            "visit_id",
        ]
        stream_qry = "SELECT %s FROM %s " % (",".join(cols_to_select),
                                             tmp_table_name)
        # print "Will iterate over", stream_qry
        insert_qry = "INSERT INTO %s (%s) VALUES (%s)" % (table_name, ",".join(
            cols_to_insert), ",".join("?" * len(cols_to_insert)))

        for row in con.execute(stream_qry):
            try:
                visit_id = site_url_visit_id_map[row[col_to_replace]]
            except Exception:
                print "Warning: Missing visit id", col_to_replace, row
                continue
            row = list(row)
            row.pop()  # remove col_to_replace, we don't need it anymore
            row.append(visit_id)  # add visit_id
            data_to_insert.append(row)
            # print "Will execute %s" % qry
            # con.execute(qry, row)
            processed += 1
            if processed % 100000 == 0:
                con.executemany(insert_qry, data_to_insert)
                del data_to_insert[:]
            print_progress(t0, processed, num_rows)
        con.executemany(insert_qry, data_to_insert)
    else:
        # read from the temp table and write into the new table
        stream_qry = "SELECT %s FROM %s " % (",".join(common_columns),
                                             tmp_table_name)
        # print "Will iterate over", stream_qry
        insert_qry = "INSERT INTO %s (%s) VALUES (%s)" % (table_name, ",".join(
            common_columns), ",".join("?" * len(common_columns)))
        for row in con.execute(stream_qry):
            data_to_insert.append(row)
            # print "Will execute %s" % qry
            # con.execute(insert_qry, row)
            processed += 1
            if processed % 100000 == 0:
                con.executemany(insert_qry, data_to_insert)
                del data_to_insert[:]
            print_progress(t0, processed, num_rows)
        con.executemany(insert_qry, data_to_insert)
    t0 = time()
    print "Will drop the temp table",
    con.execute("DROP TABLE %s" % tmp_table_name)
    print "(took", time() - t0, "s)"
    con.commit()
    return True
Ejemplo n.º 15
0
def grad_and_mass(rows, cols, mask=None, skip=None):
    """
    Returns a gradient operator matrix G for a grid of dimensions
        'rows' by 'cols',
    a corresponding mass matrix M such that L = G.T*M*G is a
    Laplacian operator matrix that is symmetric and normalized such that the
    diagonal values of interior vertices equal 1,
    and a skip matrix S such that the gradient entries of S*G*x are zero for
    (i,j) such that skip[i,j] is False. If skip is None, all entries are
    assumed to be True and S will be the identity matrix.

    Optional parameter `mask` will result in a gradient operator that entirely
    ignores (i,j) such that mask[i,j] is False.

    In other words, `mask` should be True for every grid location
    you care about (want a solution to via L = G.T*M*G).
    `skip` should be True for every grid location you have a known good value
    for.

    Matrices returned are scipy.sparse matrices.
    """

    print_progress(0)

    assert rows > 0 and cols > 0

    if mask is not None:
        mask = asarray(mask, dtype=bool)
        assert mask.shape == (rows, cols)

    if skip is not None:
        skip = asarray(skip, dtype=bool)
        assert skip.shape == (rows, cols)

    # The number of derivatives in the +row direction is cols * (rows - 1),
    # because the bottom row doesn't have them.
    num_Grow = cols * (rows - 1)
    # The number of derivatives in the +col direction is rows * (cols - 1),
    # because the right-most column doesn't have them.
    num_Gcol = rows * (cols - 1)

    # Gradient matrix
    gOnes = numpy.ones(num_Grow + num_Gcol)
    vals = numpy.append(-gOnes, gOnes)
    del gOnes

    gColRange = numpy.arange(rows * cols)
    gColRange = gColRange[~(gColRange % cols == (cols - 1))]
    colJ = numpy.concatenate([
        numpy.arange(num_Grow), gColRange,
        numpy.arange(cols, num_Grow + cols), gColRange + 1
    ])
    del gColRange

    # Skip matrix
    if (skip is not None):
        S_diag = numpy.append(skip[:-1] & skip[1:],
                              skip[:, :-1] & skip[:, 1:]).astype(int)
    else:
        S_diag = numpy.ones(num_Grow + num_Gcol)

    # Mass diagonal matrix
    if (mask is not None):
        m = numpy.zeros((rows - 1, cols))
        m[:, 1:][mask[:-1, :-1] & mask[1:, :-1]] += 0.125
        m[:, :-1][mask[:-1, 1:] & mask[1:, 1:]] += 0.125
        mass = m.flatten()
        m = numpy.zeros((rows, cols - 1))
        m[1:][mask[:-1, :-1] & mask[:-1, 1:]] += 0.125
        m[:-1][mask[1:, :-1] & mask[1:, 1:]] += 0.125
        mass = numpy.append(mass, m)
    else:
        m = numpy.hstack([
            numpy.full((rows - 1, 1), 0.125),
            numpy.full((rows - 1, cols - 2), 0.25),
            numpy.full((rows - 1, 1), 0.125)
        ])
        mass = m.flatten()
        m = numpy.vstack([
            numpy.full((1, cols - 1), 0.125),
            numpy.full((rows - 2, cols - 1), 0.25),
            numpy.full((1, cols - 1), 0.125)
        ])
        mass = numpy.append(mass, m.flatten())
    del m
    output_row = num_Grow + num_Gcol

    if (mask is not None):
        keep_rows = numpy.append(mask[:-1] & mask[1:],
                                 mask[:, :-1] & mask[:, 1:])
        tiled_keep_rows = numpy.tile(keep_rows, 2)
        vals = vals[tiled_keep_rows]
        colJ = colJ[tiled_keep_rows]
        S_diag = S_diag[keep_rows]
        mass = mass[keep_rows]
        output_row = numpy.count_nonzero(keep_rows)

    # rowI is dependent on the number of output rows.
    rowI = numpy.tile(numpy.arange(output_row), 2)

    G = sparse.coo_matrix((vals, (rowI, colJ)),
                          shape=(output_row, rows * cols))
    assert G.shape == (output_row, rows * cols)

    M = coo_diag(mass)
    assert M.shape == (output_row, output_row)

    S = coo_diag(S_diag)
    assert S.shape == (output_row, output_row)

    print_progress(1.0)
    print()

    return G, M, S
Ejemplo n.º 16
0
    if args.network is None:
        networks = ['squeezenet_v1.1', 'mobilenet', 'resnet-18', 'vgg-16']
    else:
        networks = [args.network]

    target = tvm.target.arm_cpu(model=args.device)

    # connect to remote device
    tracker = tvm.rpc.connect_tracker(args.host, args.port)
    remote = tracker.request(args.rpc_key)

    print("--------------------------------------------------")
    print("%-20s %-20s" % ("Network Name", "Mean Inference Time (std dev)"))
    print("--------------------------------------------------")
    for network in networks:
        print_progress(network)
        net, params, input_shape, output_shape = get_network(network,
                                                             batch_size=1)

        print_progress("%-20s building..." % network)
        with nnvm.compiler.build_config(opt_level=2,
                                        add_pass=['AlterOpLayout']):
            graph, lib, params = nnvm.compiler.build(
                net,
                target=target,
                shape={'data': input_shape},
                params=params,
                dtype=dtype)

        tmp = tempdir()
        if 'android' in str(target):