Beispiel #1
0
def cache_download(url, filename=None):
    """ return downloaded filepath """
    # check cache
    if not filename:
        filename = os.path.basename(url)
    storepath = os.path.join(appdir,
                             hashlib.sha224(url.encode()).hexdigest(),
                             filename)
    storedir = os.path.dirname(storepath)
    if not os.path.isdir(storedir):
        os.makedirs(storedir)
    if os.path.exists(storepath) and os.path.getsize(storepath) > 0:
        return storepath
    # download from url
    r = requests.get(url, stream=True)
    if r.status_code != 200:
        raise Exception(url, "status code", r.status_code)
    file_size = int(r.headers.get("Content-Length"))
    bar = DownloadBar(filename, max=file_size)
    with open(storepath + '.tmp', 'wb') as f:
        chunk_length = 16 * 1024
        while 1:
            buf = r.raw.read(chunk_length)
            if not buf:
                break
            f.write(buf)
            bar.next(len(buf))
        bar.finish()
    shutil.move(storepath + '.tmp', storepath)
    return storepath
def load_img_dataset(my_dir, downscaling, color):
    # Count files in directory
    data_files = [f for f in os.listdir(my_dir) if (f[0:5] == 'shape')]
    data_files = sorted(data_files)
    n_imgs = len(data_files)
    print('I found {} images'.format(n_imgs))

    # Check size of first image
    img = get_img(my_dir + '/' + data_files[0])
    height = img.shape[0]
    width = img.shape[1]

    # Declare n_channels
    if (color == 'bw'): n_channels = 1
    if (color == 'rgb'): n_channels = 3

    # Compute downscaling and allocate array
    height = math.floor(height / downscaling)
    width = math.floor(width / downscaling)
    imgs = np.zeros([n_imgs, height, width, n_channels])

    # Load all images
    bar = progress.bar.Bar('Loading imgs  ', max=n_imgs)
    for i in range(0, n_imgs):
        imgs[i, :, :, :] = load_and_reshape_img(my_dir + '/' + data_files[i],
                                                height, width, color)
        bar.next()
    bar.finish()

    return imgs, n_imgs, height, width, n_channels
    def run(self):
        """
            Desc: run ekf
            Input(s):
                none
            Output(s):
                none
        """
        t_odom_prev = 0.0 # initialize previous odom time

        # setup progress bar
        print("running kalman filter, please wait...")
        bar = progress.bar.IncrementalBar('Progress:', max=len(self.times))


        for tt, timestep in enumerate(self.times):
            # predict step for odometry
            if self.odom_df['seconds of week [s]'].isin([timestep]).any():
                dt_odom = timestep - t_odom_prev
                t_odom_prev = timestep
                if not self.initialized_odom:
                    self.initialized_odom = True
                    bar.next()
                else:
                    odom_timestep = self.odom_df[self.odom_df['seconds of week [s]'] == timestep]
                    odom_vel_x = odom_timestep['ECEF_vel_x'].values[0]
                    odom_vel_y = odom_timestep['ECEF_vel_y'].values[0]
                    odom_vel_z = odom_timestep['ECEF_vel_z'].values[0]
                    self.predict_imu(np.array([[odom_vel_x,odom_vel_y,odom_vel_z]]).T,dt_odom)
            # update gnss step
            if self.sat_df['seconds of week [s]'].isin([timestep]).any():
                sat_timestep = self.sat_df[self.sat_df['seconds of week [s]'] == timestep]
                if 'pr [m]' in self.sat_df.columns:
                    pranges = sat_timestep['pr [m]'].to_numpy().reshape(-1,1)
                    sat_x = sat_timestep['sat x ECEF [m]'].to_numpy().reshape(-1,1)
                    sat_y = sat_timestep['sat y ECEF [m]'].to_numpy().reshape(-1,1)
                    sat_z = sat_timestep['sat z ECEF [m]'].to_numpy().reshape(-1,1)
                    sigmas = sat_timestep['Pr_sigma'].to_numpy().reshape(-1,1)
                    time_correction = sat_timestep['idk wtf this is'].to_numpy().reshape(-1,1)
                    self.update_gnss_raw(pranges,sat_x,sat_y,sat_z,sigmas,time_correction)
                else:
                    lat_t = sat_timestep['Latitude'].to_numpy()[0]
                    lon_t = sat_timestep['Longitude'].to_numpy()[0]
                    alt_t = sat_timestep['Altitude'].to_numpy()[0]
                    self.update_gnss(lat_t,lon_t,alt_t)

            # add values to history
            self.mu_history = np.hstack((self.mu_history,self.mu))
            self.P_history.append(np.trace(self.P))
            bar.next() # progress bar


        bar.finish() # end progress bar
        # if finish with different num of items, spoof it.
        if len(self.times) + 1 == self.mu_history.shape[1]:
            self.mu_history = self.mu_history[:,:-1]
            self.P_history = self.P_history[:-1]
Beispiel #4
0
def update_title_progress(tags_dictionary):
    count = len(tags_dictionary)
    bar = progress.bar.FillingSquaresBar(
        'Processing', max=count, suffix='%(index)d/%(max)d - %(percent).1f%%')
    for tag in range(count):
        update_title(tags_dictionary)
        count -= 1
        bar.next()
    bar.finish()
Beispiel #5
0
def send_emails(missed_students, teacher_objects, sacs_address):
    """Function that generates emails to each teacher who has students listed in their una attribute."""

    date_today = input(
        "Enter the date for attendance data uploaded:\nDate: ").strip()

    authenticated = True
    while authenticated:
        try:  #try entering a correct username and password; will loop until the user chooses to quit or is able to authenticate
            username = input("Username: "******"Password: "******"Sending...")
                #count += 1

            bar.finish()

            smtpObj.quit()

            for student in missed_students:
                print("Line {}. No teacher email on file for {} {} {}.".format(
                    student[3], student[0], student[1], student[2]))
            if missed_students != []:
                print(
                    "\nBe sure to follow up with this teacher or these teachers individually."
                )

            authenticated = False

            toc = time.time()  #end time for program execution

            print(
                "Program execution time:", round(toc - tic, 0), "seconds"
            )  #print the time taken to complete sending all emails, rounded to 4 decimals

        except smtplib.SMTPAuthenticationError:
            print("Looks like your username or password was incorrect.")
            smtpObj.quit()
Beispiel #6
0
    def toCSV(self, filepath):
        with open(filepath, 'w', newline='') as fobj:
            writer = csv.writer(fobj)
            nodes = self.traverse(mode='in')

            bar = progress.bar.Bar('Exporting tree to CSV', max=len(nodes))
            for node in nodes:
                writer.writerow(node.data)
                bar.next()
            bar.finish()
Beispiel #7
0
def export_all(subjects, folder):
    assert folder[:1] == "/"
    logging.info("Exporting subjects...")
    suffix_format = "%(index)d/%(max)d [%(elapsed_td)s / %(eta_td)s]"
    bar = progress.bar.Bar("Exporting subjects",
                           max=len(subjects),
                           suffix=suffix_format)
    for subject in subjects:
        export_subject(subject, folder)
        bar.next()
    bar.finish()
Beispiel #8
0
 def fromJSON(self, filepath):
     with open(filepath, 'r') as fil:
         data = json.load(fil)
     
     
     bar = progress.bar.Bar('Inserting JSON', max=len(data))
     for key, value in data.items():
         cargo = (key, value)
         self.insert(data=cargo)
         bar.next()
     bar.finish()
def main(stack_name):
    client = docker.from_env()

    # Docker Python API doesn't seem to support listing services and stacks, so we have to do
    # some nasty shell parsing

    try:
        output = subprocess.check_output(
            ['/usr/bin/docker', 'stack', 'list', '--format', '{{.Name}}'])
    except subprocess.CalledProcessError:
        print("Cannot list stacks")
        print(output)
        exit(1)

    if stack_name not in output.decode('utf-8').split('\n'):
        print("Stack {} not found".format(stack_name))

    try:
        output = subprocess.check_output([
            '/usr/bin/docker', 'stack', 'services', stack_name, '--format',
            '{{.Name}}'
        ])
    except subprocess.CalledProcessError:
        print("Cannot find services for stack {}".format(stack_name))
        print(output)
        exit(1)

    service_names = output.decode('utf-8').split('\n')

    services = [
        Service(x) for x in client.services.list() if x.name in service_names
    ]

    bar = progress.bar.Bar("Deploying", max=len(services))

    while True:
        for s in services:
            s.update()

        bar.index = len(list(filter(lambda x: x.is_complete(), services)))
        bar.update()

        if all([s.is_complete() for s in services]):
            bar.finish()
            for s in services:
                print("{} - {}".format(s.name, s.get_state()))

            if all([s.success for s in services]):
                exit(0)
            else:
                exit(1)

        time.sleep(1)
Beispiel #10
0
def download(url, target):
    print("Download", target)
    r = requests.get(url, stream=True)
    r.raise_for_status()

    bar = progress.bar.Bar()
    bar.max = int(r.headers.get("content-length"))
    with open(target, "wb") as f:
        for chunk in r.iter_content(chunk_size=4096):
            f.write(chunk)
            bar.next(len(chunk))
        bar.finish()
Beispiel #11
0
def download(url: str, storepath: str):
    r = requests.get(url, stream=True)
    r.raise_for_status()
    file_size = int(r.headers.get("Content-Length"))

    bar = DownloadBar(storepath, max=file_size)
    chunk_length = 16 * 1024
    with open(storepath + '.part', 'wb') as f:
        for buf in r.iter_content(chunk_length):
            f.write(buf)
            bar.next(len(buf))
        bar.finish()
    shutil.move(storepath + '.part', storepath)
Beispiel #12
0
def checkRecall(UPC):
    '''
    Parses CFIA recalls for UPC. Returns recall link if found, else none.
    '''

    cfia_url = 'https://www.inspection.gc.ca/food-recall-warnings-and-allergy-alerts/eng/1351519587174/1351519588221'

    soup = bs4.BeautifulSoup(urllib.request.urlopen(cfia_url), 'html.parser')
    table = soup.find('tbody')

    recall_urls = []
    rows = table.findChildren('tr')
    for row in rows:
        recall_url = row.find('a')['href']
        recall_url = 'https://www.inspection.gc.ca/' + recall_url
        recall_urls.append(recall_url)

    bar = progress.bar.Bar('Searching CFIA', max=len(recall_urls))
    for recall_url in recall_urls:
        soup = bs4.BeautifulSoup(urllib.request.urlopen(recall_url),
                                 'html.parser')

        table = soup.find('table',
                          attrs={
                              'class': 'table table-bordered table-condensed'
                          }).find('tbody')

        rows = table.findChildren('tr')

        for row in rows:
            if row.findChildren(
                    'th'):  # check if bolded first row header is found
                col = 2
            else:
                col = 3

            UPC_recall = row.findChildren('td')[col].text.strip().replace(
                u'\xa0', '').replace(' ', '')

            if UPC == UPC_recall:
                bar.finish()
                return recall_url

            elif 'Startswith' in UPC_recall:
                UPC_recall_trim = UPC_recall[10:]

                if UPC_recall_trim in UPC:
                    bar.finish()
                    return recall_url

            elif 'Noneor' in UPC_recall:
                UPC_recall = UPC_recall[6:]
                if UPC == UPC_recall:
                    bar.finish()
                    return recall_url

        bar.next()
    bar.finish()

    return None
Beispiel #13
0
    def run(self):
        """
            Desc: run ekf
            Input(s):
                none
            Output(s):
                none
        """
        t_odom_prev = 0.0 # initialize previous odom time

        # setup progress bar
        print("running kalman filter, please wait...")
        bar = progress.bar.IncrementalBar('Progress:', max=len(self.times))


        for tt, timestep in enumerate(self.times):
            # predict step for odometry
            # if self.odom_df['seconds of week [s]'].isin([timestep]).any():
            #     dt_odom = timestep - t_odom_prev
            #     t_odom_prev = timestep
            #     if tt == 0:
            #         continue
            #     odom_timestep = self.odom_df[self.odom_df['seconds of week [s]'] == timestep]
            #     odom_vel_x = odom_timestep['ECEF_vel_x'].values[0]
            #     odom_vel_y = odom_timestep['ECEF_vel_y'].values[0]
            #     odom_vel_z = odom_timestep['ECEF_vel_z'].values[0]
                # self.predict_imu(np.array([[odom_vel_x,odom_vel_y,odom_vel_z]]).T,dt_odom)

            # update gnss step
            if self.sat_df['seconds of week [s]'].isin([timestep]).any():
                sat_timestep = self.sat_df[self.sat_df['seconds of week [s]'] == timestep]
                pranges = sat_timestep['pr [m]'].to_numpy().reshape(-1,1)
                sat_x = sat_timestep['sat x ECEF [m]'].to_numpy().reshape(-1,1)
                sat_y = sat_timestep['sat y ECEF [m]'].to_numpy().reshape(-1,1)
                sat_z = sat_timestep['sat z ECEF [m]'].to_numpy().reshape(-1,1)
                self.predict_simple()
                self.update_gnss(pranges,sat_x,sat_y,sat_z)

            # add values to history
            self.mu_history = np.hstack((self.mu_history,self.mu))
            self.P_history.append(np.trace(self.P))
            bar.next() # progress bar

        bar.finish() # end progress bar

        self.mu_history = self.mu_history[:,:-1]
        self.mu_history[0,:] += self.x0
        self.mu_history[1,:] += self.y0
        self.mu_history[2,:] += self.z0
        self.P_history = self.P_history[:-1]
Beispiel #14
0
    def handle(self, *args, **options):

        prefix = options['prefix'][0]
        skip_hidden_dir = True

        l = ManagedFile.objects.all()
        bar = SlowBar(max=l.count())
        for file in l:
            bar.next()
            try:
                if not file.isTracked and file.names.count() < 1:
                    file.delete()
            except:
                print("Skipped file id={}, names=".format(
                    file.id, list(file.names.all())))
        bar.finish()
def load_drag_lift_dataset(my_dir, n_outputs):
    sol_files = sorted(
        [f for f in os.listdir(my_dir) if f.startswith('shape')])
    n_sols = len(sol_files)

    sols = np.zeros([n_sols, n_outputs])
    bar = progress.bar.Bar('Loading labels', max=n_sols)

    for i in range(0, n_sols):
        y = np.loadtxt(my_dir + '/' + sol_files[i], skiprows=1)
        if (n_outputs == 1): sols[i, 0] = y[y.shape[0] - 1, 1]
        if (n_outputs == 2): sols[i, 0:2] = y[y.shape[0] - 1, 1:3]
        bar.next()
    bar.finish()

    return sols, n_sols
Beispiel #16
0
def cache_download(url,
                   filename=None,
                   timeout=None,
                   storepath=None,
                   logger=logger):
    """ return downloaded filepath """
    # check cache
    if not filename:
        filename = os.path.basename(url)
    if not storepath:
        storepath = gen_cachepath(url)
    storedir = os.path.dirname(storepath)
    if not os.path.isdir(storedir):
        os.makedirs(storedir)
    if os.path.exists(storepath) and os.path.getsize(storepath) > 0:
        logger.debug("Use cached assets: %s", storepath)
        return storepath

    logger.debug("Download %s", url)
    # download from url
    headers = {
        'Accept': '*/*',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
        'Connection': 'keep-alive',
        'Origin': 'https://github.com',
        'User-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
    } # yapf: disable
    r = requests.get(url, stream=True, headers=headers, timeout=None)
    r.raise_for_status()

    file_size = int(r.headers.get("Content-Length"))
    bar = DownloadBar(filename, max=file_size)
    with open(storepath + '.part', 'wb') as f:
        chunk_length = 16 * 1024
        while 1:
            buf = r.raw.read(chunk_length)
            if not buf:
                break
            f.write(buf)
            bar.next(len(buf))
        bar.finish()

    assert file_size == os.path.getsize(storepath +
                                        ".part")  # may raise FileNotFoundError
    shutil.move(storepath + '.part', storepath)
    return storepath
Beispiel #17
0
def mergeCSV(inpath, outpath):
    filenames = os.listdir(inpath)

    data = []

    bar = progress.bar.Bar('Merging CSV files', max=len(filenames))
    for filename in filenames:
        with open(inpath + filename, 'r') as fobj:
            reader = csv.reader(fobj)
            data.extend([row for row in reader])
        bar.next()
    bar.finish()

    # export
    with open(outpath, 'w', newline='') as fobj:
        writer = csv.writer(fobj)
        writer.writerows(data)
Beispiel #18
0
    async def map(cls, data, *, concurrency: int, label: str):
        pool = cls(data, concurrency=concurrency)
        pool._start()

        bar = progress.bar.Bar(label[:15].ljust(15), max=len(data))

        stop_cnt = 0
        while True:
            piece = await pool._results.get()
            if piece is cls._STOP:
                stop_cnt += 1
                if stop_cnt == concurrency:
                    bar.finish()
                    return
            elif isinstance(piece, Exception):
                raise piece
            else:
                bar.next()
Beispiel #19
0
    def handle(self, *args, **options):

        prefix = options['prefix'][0]
        skip_hidden_dir = True
        crit = Q(size=None)
        l = ManagedFile.objects.filter(crit)
        bar = SlowBar(max=l.count())
        for file in l:
            bar.next()
            try:
                stat_t = file.robust_stat()
                file.size = stat_t[stat.ST_SIZE]
                #print("{} size={}".format(,file.size))
                file.save()
            except:
                print("Skipped file id={}, names=".format(
                    file.id, list(file.names.all())))
        bar.finish()
Beispiel #20
0
def cache_download(url, filename=None):
    """ return downloaded filepath """
    # check cache
    if not filename:
        filename = os.path.basename(url)
    storepath = os.path.join(appdir,
                             hashlib.sha224(url.encode()).hexdigest(),
                             filename)
    storedir = os.path.dirname(storepath)
    if not os.path.isdir(storedir):
        os.makedirs(storedir)
    if os.path.exists(storepath) and os.path.getsize(storepath) > 0:
        return storepath
    # download from url
    headers = {
        'Accept':
        '*/*',
        'Accept-Encoding':
        'gzip, deflate, br',
        'Accept-Language':
        'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
        'Connection':
        'keep-alive',
        'Origin':
        'https://github.com',
        'User-agent':
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
    }
    r = requests.get(url, stream=True, headers=headers)
    if r.status_code != 200:
        raise Exception(url, "status code", r.status_code)
    file_size = int(r.headers.get("Content-Length"))
    bar = DownloadBar(filename, max=file_size)
    with open(storepath + '.tmp', 'wb') as f:
        chunk_length = 16 * 1024
        while 1:
            buf = r.raw.read(chunk_length)
            if not buf:
                break
            f.write(buf)
            bar.next(len(buf))
        bar.finish()
    shutil.move(storepath + '.tmp', storepath)
    return storepath
def download(arch: str, storepath: str):
    r = requests.get(
        "https://github.com/openatx/atx-agent/releases/download/{0}/atx-agent_{0}_linux_{1}.tar.gz"
        .format(__atx_agent_version__, arch),
        stream=True)
    r.raise_for_status()
    file_size = int(r.headers.get("Content-Length"))

    bar = DownloadBar(storepath, max=file_size)
    with open(storepath + '.tmp', 'wb') as f:
        chunk_length = 16 * 1024
        while 1:
            buf = r.raw.read(chunk_length)
            if not buf:
                break
            f.write(buf)
            bar.next(len(buf))
        bar.finish()
    shutil.move(storepath + '.tmp', storepath)
    def run(self):
        """
            Desc: run ekf
            Input(s):
                none
            Output(s):
                none
        """
        t_odom_prev = 0.0 # initialize previous odom time

        # setup progress bar
        print("running kalman filter, please wait...")
        bar = progress.bar.IncrementalBar('Progress:', max=len(self.times))

        for tt, timestep in enumerate(self.times):
            # simple predict step
            self.predict_simple()

            # predict step for odometry
            if self.odom_df['seconds of week [s]'].isin([timestep]).any():
                dt_odom = timestep - t_odom_prev
                t_odom_prev = timestep
                if not self.initialized_odom:
                    self.initialized_odom = True
                    bar.next()
                else:
                    odom_timestep = self.odom_df[self.odom_df['seconds of week [s]'] == timestep]
                    baro_meas = odom_timestep['Normalized barometer:Raw[meters]'].values[0]
                    self.update_barometer(baro_meas)

            self.mu = np.clip(self.mu,0.0,np.inf) # force to be above zero altitude

            # add values to history
            self.mu_history = np.hstack((self.mu_history,self.mu))
            self.P_history.append(np.trace(self.P))
            bar.next() # progress bar

        bar.finish() # end progress bar
        # if finish with different num of items, spoof it.
        if len(self.times) + 1 == self.mu_history.shape[1]:
            self.mu_history = self.mu_history[:,:-1]
            self.P_history = self.P_history[:-1]
Beispiel #23
0
    def fromCSV(self, filepath, shuffle=True):
        start = time.time() ### REmove AFTER TESTING

        with open(filepath) as fil:
            reader = csv.reader(fil)
            data = [row for row in reader]

            random.shuffle(data) if shuffle else False

            # data = data[:1] #### REMOVE AFTER TESTING

            bar = progress.bar.Bar('Inserting CSV "{}"'.format(filepath), max=len(data))
            for row in data:
                self.insert(row)
                bar.next()
            bar.finish()
        
        end = time.time()
        elapsed = end-start
        print('ELAPSED: {}'.format(round(elapsed,3)))
Beispiel #24
0
    def run(self):
        """
            Desc: run ekf
            Input(s):
                none
            Output(s):
                none
        """
        # setup progress bar
        print("running kalman filter, please wait...")
        bar = progress.bar.IncrementalBar('Progress:', max=len(self.times))

        for tt, timestep in enumerate(self.times):
            # update gnss step
            if self.sat_df['seconds of week [s]'].isin([timestep]).any():
                sat_timestep = self.sat_df[self.sat_df['seconds of week [s]']
                                           == timestep]
                pranges = sat_timestep['pr [m]'].to_numpy().reshape(-1, 1)
                sat_x = sat_timestep['sat x ECEF [m]'].to_numpy().reshape(
                    -1, 1)
                sat_y = sat_timestep['sat y ECEF [m]'].to_numpy().reshape(
                    -1, 1)
                sat_z = sat_timestep['sat z ECEF [m]'].to_numpy().reshape(
                    -1, 1)
                sigmas = sat_timestep['Pr_sigma'].to_numpy().reshape(-1, 1)
                time_correction = sat_timestep['idk wtf this is'].to_numpy(
                ).reshape(-1, 1)
                self.predict_simple()
                self.update_gnss(pranges, sat_x, sat_y, sat_z, sigmas,
                                 time_correction)

            # add values to history
            self.mu_history = np.hstack((self.mu_history, self.mu))
            self.P_history.append(np.trace(self.P))
            bar.next()  # progress bar

        bar.finish()  # end progress bar
        if len(self.times) + 1 == self.mu_history.shape[1]:
            self.mu_history = self.mu_history[:, :-1]
            self.P_history = self.P_history[:-1]
Beispiel #25
0
def main():
    args = parse_arguments()

    if sys.platform == "linux":
        host = "linux-x86_64"
    elif sys.platform == "darwin":
        host = "darwin-x86_64"
    else:
        print("Unsupported platform: {}".format(sys.platform))
        sys.exit(1)

    ndk_version = "r16b"
    ndk_package_base = "android-ndk-{}".format(ndk_version)
    ndk_package_archive = "{}-{}.zip".format(ndk_package_base, host)
    ndk_url = "https://dl.google.com/android/repository/{}".format(ndk_package_archive)
    ndk_download_path = "{}/{}".format(args.download_prefix, ndk_package_archive)
    ndk_tmp_unzip = "/tmp/android-ndk-unzip"
    ndk_unzip_path = "{}/android-ndk".format(args.download_prefix)

    if has_progress_bar:
        bar = ProgressBar("Downloading NDK")
        urllib.request.urlretrieve(ndk_url, ndk_download_path, bar.urllib_reporthook)
        bar.finish()

        bar = ProgressBar("Unzipping NDK")
        unzip(ndk_download_path, ndk_tmp_unzip, bar.unzip_reporthook)
        bar.finish()
    else:
        print("Downloading NDK...")
        urllib.request.urlretrieve(ndk_url, ndk_download_path)
        print("Done.")
        print("Unzipping NDK...")
        unzip(ndk_download_path, ndk_tmp_unzip)
        print("Done.")

    print("Moving ndk to {}".format(ndk_unzip_path))
    shutil.move(ndk_tmp_unzip + "/android-ndk-r16b", ndk_unzip_path)
    print("Done.")
Beispiel #26
0
    def _wait_install_finished(self, id, installing_callback):
        bar = None
        downloaded = True

        while True:
            resp = self._reqsess.get(self.path2url('/install/' + id))
            resp.raise_for_status()
            jdata = resp.json()
            message = jdata['message']
            pg = jdata.get('progress')

            def notty_print_progress(pg):
                written = pg['copiedSize']
                total = pg['totalSize']
                print(
                    time.strftime('%H:%M:%S'), 'downloading %.1f%% [%s/%s]' %
                    (100.0 * written / total if total != 0 else 0,
                     humanize.naturalsize(written, gnu=True),
                     humanize.naturalsize(total, gnu=True)))

            if message == 'downloading':
                downloaded = False
                if pg:  # if there is a progress
                    if hasattr(sys.stdout, 'isatty'):
                        if sys.stdout.isatty():
                            if not bar:
                                bar = _ProgressBar(time.strftime('%H:%M:%S') +
                                                   ' downloading',
                                                   max=pg['totalSize'])
                            written = pg['copiedSize']
                            bar.next(written - bar.index)
                        else:
                            notty_print_progress(pg)
                    else:
                        pass
                else:
                    print(time.strftime('%H:%M:%S'), "download initialing")
            else:
                if not downloaded:
                    downloaded = True
                    if bar:  # bar only set in atty
                        bar.next(pg['copiedSize'] - bar.index) if pg else None
                        bar.finish()
                    else:
                        print(time.strftime('%H:%M:%S'), "download 100%")
                print(time.strftime('%H:%M:%S'), message)
            if message == 'installing':
                if callable(installing_callback):
                    installing_callback(self)
            if message == 'success installed':
                return jdata.get('packageName')

            if jdata.get('error'):
                raise RuntimeError("error", jdata.get('error'))

            try:
                time.sleep(1)
            except KeyboardInterrupt:
                bar.finish() if bar else None
                print("keyboard interrupt catched, cancel install id", id)
                self._reqsess.delete(self.path2url('/install/' + id))
                raise
def run(data_path, db_config, index1_table_name, index2_table_names,
        ske_config):

    start = datetime.datetime.now()
    log_manager.info_global("--------------------------------")
    log_manager.info_global(
        f"{start.strftime('[%y-%m-%d %H:%M:%S]')} START INDEXING\n")

    log_manager.info_global("Creating DB tables ...")

    create_tables(db_config, index1_table_name, index2_table_names)

    log_manager.info_global("Creating DataFrames from original CSV files ...")

    # 1. set up the keywords dataframe
    log_manager.debug_global("Creating DataFrame for keywords ...")
    keyword_df = read_keyword_df(data_path)

    # store the keywords df to the database
    log_manager.debug_global("Writing keywords DF to DB ...")
    write_df_to_db(
        keyword_df.drop(columns=['csv_tokens', 'csv_types'], inplace=False),
        index2_table_names['keywords'], db_config)

    # 2. set up the text token counts dataframe
    log_manager.debug_global("Creating DataFrame for token counts ...")
    token_df = pd.DataFrame()

    # in doc_df, we create a column for each keyword
    # and fill it with that keyword's token count in the given document
    bar = create_progress_bar('Calculating total of tokens per text',
                              keyword_df.shape[0])

    for kw in keyword_df.itertuples():
        # kw is a Pandas object representing the row
        # we find the token counts in the CSV file stored in the column 'csv_tokens' of keyword_df
        temp_df = pd.read_csv(f'{data_path}/CSV/{kw.csv_tokens}',
                              sep='\t',
                              skiprows=8,
                              names=['docid', 'token', 'token_count'],
                              usecols=['docid', 'token_count'])
        # we need to group by doc id and sum all the token counts for various shapes of the token
        temp_df = temp_df.groupby(['docid'], as_index=False).sum()

        # add a column
        temp_df['keyword_id'] = kw.Index

        temp_df = temp_df.set_index(['keyword_id', 'docid'],
                                    verify_integrity=True)
        # 1st index: keyword_id, because this allows for fewer lookups when calculating the scores

        # we append the rows to token_df
        token_df = token_df.append(temp_df, verify_integrity=True)

        bar.next()
    bar.finish()

    # Don't write to token_df to DB yet because it has a FK constraint to doc_df.

    # 3. set up the texts dataframe
    log_manager.debug_global("Creating DataFrame for texts ...")

    # we use this file only to get a complete list of doc ids
    doc_df = pd.read_csv(f'{data_path}/mara002_kvr_all.docids.counts.csv',
                         sep='\t',
                         names=['types_count', 'docid'],
                         usecols=['docid'])
    doc_df['score_rarity_diversity'] = 0.0
    doc_df['already_annotated'] = False
    doc_df['selected_on'] = None
    doc_df = doc_df.set_index('docid')

    # Calculate scores
    log_manager.debug_global("Calculating scores for texts ...")

    doc_df = score_rarity_diversity(doc_df, keyword_df, token_df)

    # Write doc_df to DB
    log_manager.debug_global("Writing DF for texts to DB ...")

    write_df_to_db(doc_df, index2_table_names['scores'], db_config)

    # Now we can write token_df to the DB.
    log_manager.debug_global("Writing DF for tokens to DB ...")

    write_df_to_db(token_df, index2_table_names['tokens'], db_config)

    # all done!
    end = datetime.datetime.now()
    log_manager.info_global(
        f"{end.strftime('[%y-%m-%d %H:%M:%S]')} DONE INDEXING, duration: {end-start}"
    )

    return  # TODO: Is this empty return on purpose?
def score_rarity_diversity(doc_df, keyword_df, token_df):
    # This algorithm favors rare keywords over frequent keywords,
    #   and many types over many tokens,
    #   but also many tokens over few tokens.
    #
    # score(text) =
    #   sum for each keyword k:
    #     sum for n from 1 to the token count of k in text:
    #       (1/corpus token count of k) * (1/n)
    #
    # A keyword with a high token count in the corpus will yield a smaller coefficient, and vice versa,
    #   thus favoring rarity.
    # A text t1 where keyword k appears n times will have a lower score
    #   than a text t2 where k appears n+1 times, if t1 and t2 are otherwise identical,
    #   thus favoring higher token counts.
    # A text t1 where keyword k1 appears n times and keyword k2 appears m times,
    #   where k1 and k2 have the same corpus token count, will have a higher score
    #   than a text t2 where k1 appears n+l times and k2 appears m-l times,
    #   thus favoring diversity.

    log_manager.debug_global("Calculating rarity/diversity scores ...")

    # We select the column 'score_rarity_diversity', which as of now contains only 0s.
    # This returns a Series object whose index is the docids (the index of doc_df).
    scores = doc_df['score_rarity_diversity']

    bar = create_progress_bar('Computing scores per keyword',
                              keyword_df.shape[0])

    # iterate over rows in keyword_df
    for kw, data in keyword_df.iterrows():
        # kw is the label of the row (the keyword_id)
        # data is a Series of the values in this row

        # get this keyword's corpus token count
        # we will use this to calculate its inverse frequency
        kw_freq = data.corpus_count

        # get this keyword's token count per text
        try:
            # token_df has a MultiIndex: 1st the keyword_id, 2nd the docid
            # We select all rows with keyword_id = kw. This returns a DataFrame.
            # Then we select only the column 'token_count'. This returns a Series.
            tokencounts = token_df.loc[kw]['token_count']
            # tokencounts is a Series, indexed with docid,
            #   containing as values the token counts of kw in the given docid

        except KeyError as e:
            tokencounts = pd.Series(index=doc_df.index, data=0)

        # This is the formula:
        def calculate_score(token_count, kw_freq):
            return sum(
                map(lambda x: pow(kw_freq, -1) * pow(x, -1),
                    range(1,
                          int(token_count) + 1)))

        # Apply this function to the token counts of the current keyword.
        scores = scores.add(tokencounts.apply(calculate_score,
                                              args=(kw_freq, )),
                            fill_value=0.0)

        bar.next()

    bar.finish()

    # feed the temporary Series back into the table
    doc_df['score_rarity_diversity'] = scores

    # sort by highest score
    doc_df = doc_df.sort_values(by='score_rarity_diversity', ascending=False)

    return doc_df
def test(opts,
         model,
         test_data,
         which_epoch='best',
         batch_size=1,
         expdir=None,
         save_loss=False,
         save_images=True):
    test_loader = DataLoader(test_data,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=opts.dataloader_workers,
                             pin_memory=True)

    model.load_checkpoint(which_epoch)
    model.set_mode('eval')

    output_dir = os.path.join(
        opts.results_dir, opts.experiment_name if expdir is None else expdir,
        'test_{}'.format(which_epoch))
    os.makedirs(output_dir, exist_ok=True)

    test_start = time.perf_counter()

    test_loss = None

    bar = progress.bar.Bar('Test', max=len(test_loader))
    for idx, data in enumerate(test_loader):
        model.set_data(data)
        model.test(compute_loss=save_loss)

        if save_loss:
            if test_loss is None:
                test_loss = model.get_data()
            else:
                test_loss = utils.concatenate_dicts(test_loss,
                                                    model.get_data())

        if save_images:
            output = model.get_images()

            for img_label, img in output.items():
                this_output_dir = os.path.join(output_dir, img_label)
                os.makedirs(this_output_dir, exist_ok=True)

                output_file = os.path.join(this_output_dir,
                                           '{:05}.png'.format(idx))
                # print("Saving to {}".format(output_file))
                img.save(output_file)

        bar.next()
    bar.finish()

    test_end = time.perf_counter()
    test_fps = len(test_data) / (test_end - test_start)
    print('Processed {} images | time: {:.3f} s | test: {:.3f} fps'.format(
        len(test_data), test_end - test_start, test_fps))

    if save_loss:
        loss_file = os.path.join(output_dir, 'loss.csv')
        header = [key for key in test_loss]
        entries = [test_loss[key] for key in test_loss]
        entries = np.atleast_2d(np.array(entries)).T.tolist()

        print("Saving test loss to {}".format(loss_file))
        with open(loss_file, 'wt') as file:
            file.write(','.join(header) + '\n')
            for entry in entries:
                line = ','.join([str(val) for val in entry]) + '\n'
                file.write(line)
def train(opts,
          model,
          train_data,
          val_data,
          num_epochs,
          resume_from_epoch=None):
    train_loader = DataLoader(train_data,
                              batch_size=opts.batch_size,
                              shuffle=True,
                              num_workers=opts.dataloader_workers,
                              pin_memory=True)
    val_loader = DataLoader(val_data,
                            batch_size=opts.batch_size,
                            shuffle=False,
                            num_workers=opts.dataloader_workers,
                            pin_memory=True)

    print('Training images: {}'.format(len(train_data)))
    print('Validation images: {}'.format(len(val_data)))

    log_dir = os.path.join(opts.results_dir, opts.experiment_name)
    writer = SummaryWriter(log_dir)

    ### LOAD FROM CHECKPOINT ###
    if resume_from_epoch is not None:
        try:
            initial_epoch = model.load_checkpoint(resume_from_epoch) + 1
            iterations = (initial_epoch - 1) * opts.batch_size
        except FileNotFoundError:
            print('No model available for epoch {}, starting fresh'.format(
                resume_from_epoch))
            initial_epoch = 1
            iterations = 0
    else:
        initial_epoch = 1
        iterations = 0

    ### TRAIN AND VALIDATE ###
    best_total_val_loss = 1e12

    for epoch in range(initial_epoch, num_epochs + 1):
        epoch_start = time.perf_counter()

        # TRAIN
        epoch_train_loss = None
        model.set_mode('train')

        bar = progress.bar.Bar('Epoch {} train'.format(epoch),
                               max=len(train_loader))
        for data in train_loader:
            model.set_data(data)
            model.optimize()
            if epoch_train_loss is None:
                epoch_train_loss = model.get_errors()
            else:
                epoch_train_loss = utils.concatenate_dicts(
                    epoch_train_loss, model.get_errors())

            iterations += 1
            bar.next()
        bar.finish()

        # VISUALIZE
        for label, image in model.get_images().items():
            image = np.array(image).transpose([2, 0, 1])
            writer.add_image('train/' + label, image, epoch)

        train_end = time.perf_counter()

        # VALIDATE
        epoch_val_loss = None
        model.set_mode('eval')

        bar = progress.bar.Bar('Epoch {} val  '.format(epoch),
                               max=len(val_loader))
        for data in val_loader:
            model.set_data(data)
            model.test(compute_loss=True)
            if epoch_val_loss is None:
                epoch_val_loss = model.get_errors()
            else:
                epoch_val_loss = utils.concatenate_dicts(
                    epoch_val_loss, model.get_errors())

            bar.next()
        bar.finish()

        for label, image in model.get_images().items():
            image = np.array(image).transpose([2, 0, 1])
            writer.add_image('val/' + label, image, epoch)

        epoch_end = time.perf_counter()

        epoch_avg_val_loss = utils.compute_dict_avg(epoch_val_loss)
        epoch_avg_train_loss = utils.compute_dict_avg(epoch_train_loss)
        train_fps = len(train_data) / (train_end - epoch_start)
        val_fps = len(val_data) / (epoch_end - train_end)

        print(
            'End of epoch {}/{} | iter: {} | time: {:.3f} s | train: {:.3f} fps | val: {:.3f} fps'
            .format(epoch, num_epochs, iterations, epoch_end - epoch_start,
                    train_fps, val_fps))

        # LOG ERRORS
        errors = utils.tag_dict_keys(epoch_avg_train_loss, 'train')
        errors.update(utils.tag_dict_keys(epoch_avg_val_loss, 'val'))
        for key, value in sorted(errors.items()):
            writer.add_scalar(key, value, epoch)
            print('{:20}: {:.3e}'.format(key, value))

        writer.add_scalar('fps/train', train_fps, epoch)
        writer.add_scalar('fps/val', val_fps, epoch)

        # SAVE MODELS
        model.save_checkpoint(epoch, 'latest')

        if epoch % opts.checkpoint_interval == 0:
            model.save_checkpoint(epoch, epoch)

        curr_total_val_loss = 0
        for key, val in epoch_avg_val_loss.items():
            if 'eval_loss' in key:
                try:
                    curr_total_val_loss += val[-1]
                except IndexError:
                    curr_total_val_loss += val

        if epoch == 1 or curr_total_val_loss < best_total_val_loss:
            model.save_checkpoint(epoch, 'best')
            best_total_val_loss = curr_total_val_loss