def get(self, filepath_id): fid = int(filepath_id) if not validate_filepath_access_by_user(self.current_user, fid): raise HTTPError( 403, "%s doesn't have access to " "filepath_id: %s" % (self.current_user.email, str(fid))) relpath = filepath_id_to_rel_path(fid) fp_info = get_filepath_information(fid) fname = basename(relpath) if fp_info['filepath_type'] in ('directory', 'html_summary_dir'): # This is a directory, we need to list all the files so NGINX # can download all of them to_download = self._list_dir_files_nginx(fp_info['fullpath']) self._write_nginx_file_list(to_download) fname = '%s.zip' % fname else: self._write_nginx_placeholder_file(relpath) self.set_header('Content-Type', 'application/octet-stream') self.set_header('Content-Transfer-Encoding', 'binary') self.set_header('X-Accel-Redirect', '/protected/' + relpath) self._set_nginx_headers(fname) self.finish()
def get(self, filepath_id): fid = int(filepath_id) if not validate_filepath_access_by_user(self.current_user, fid): raise HTTPError( 403, "%s doesn't have access to " "filepath_id: %s" % (self.current_user.email, str(fid))) relpath = filepath_id_to_rel_path(fid) fp_info = get_filepath_information(fid) fname = basename(relpath) if fp_info['filepath_type'] in ('directory', 'html_summary_dir'): # This is a directory, we need to list all the files so NGINX # can download all of them to_download = self._list_dir_files_nginx(fp_info['fullpath']) self._write_nginx_file_list(to_download) fname = '%s.zip' % fname else: self._write_nginx_placeholder_file(relpath) self.set_header('Content-Type', 'application/octet-stream') self.set_header('Content-Transfer-Encoding', 'binary') self.set_header('X-Accel-Redirect', '/protected/' + relpath) aid = filepath_id_to_object_id(fid) if aid is not None: fname = '%d_%s' % (aid, fname) self._set_nginx_headers(fname) self.finish()
try: size = getsize(finfo['fullpath']) except (FileNotFoundError, PermissionError): return finfo, None, None checksum = compute_checksum(finfo['fullpath']) return finfo['filepath_id'], checksum, size # get all filepaths and their filepath information; takes ~10 min with TRN: TRN.add("SELECT filepath_id FROM qiita.filepath") files = [] for fid in TRN.execute_fetchflatten(): files.append(get_filepath_information(fid)) # just get the filepath ids that haven't been processed, the file format # of this file is filepath_id[tab]checksum[tab]filesize fpath = join(dirname(abspath(__file__)), '74.py.cache.tsv') processed = [] if exists(fpath): with open(fpath, 'r') as f: processed = [ int(line.split('\t')[0]) for line in f.read().split('\n') if line != '' ] files_curr = [f for f in files if f['filepath_id'] not in processed] # let's use 20 processor and in each iteration use 120 files fids = 120
fids = TRN.execute_fetchflatten() fpath = join(dirname(abspath(__file__)), 'support_files', 'patches', 'python_patches', '74.py.cache.tsv') cache = dict() if exists(fpath): df = pd.read_csv(fpath, sep='\t', index_col=0, dtype=str, names=['filepath_id', 'checksum', 'fp_size']) cache = df.to_dict('index') for fid in fids: if fid not in cache: finfo = get_filepath_information(fid) try: size = getsize(finfo['fullpath']) except FileNotFoundError: size = 0 try: checksum = compute_checksum(finfo['fullpath']) except FileNotFoundError: checksum = '' else: checksum = cache[fid]['checksum'] size = cache[fid]['fp_size'] with TRN: sql = """UPDATE qiita.filepath
FROM qiita.filepath""" TRN.add(sql) fids = TRN.execute_fetchflatten() fpath = join(dirname(abspath(__file__)), 'support_files', 'patches', 'python_patches', '74.py.cache.tsv') cache = dict() if exists(fpath): df = pd.read_csv(fpath, sep='\t', index_col=0, dtype=str, names=['filepath_id', 'checksum', 'fp_size']) cache = df.to_dict('index') for fid in fids: if fid not in cache: finfo = get_filepath_information(fid) try: size = getsize(finfo['fullpath']) except FileNotFoundError: size = 0 try: checksum = compute_checksum(finfo['fullpath']) except FileNotFoundError: checksum = '' else: checksum = cache[fid]['checksum'] size = cache[fid]['fp_size'] with TRN: sql = """UPDATE qiita.filepath
try: size = getsize(finfo['fullpath']) except (FileNotFoundError, PermissionError): return finfo, None, None checksum = compute_checksum(finfo['fullpath']) return finfo['filepath_id'], checksum, size # get all filepaths and their filepath information; takes ~10 min with TRN: TRN.add("SELECT filepath_id FROM qiita.filepath") files = [] for fid in TRN.execute_fetchflatten(): files.append(get_filepath_information(fid)) # just get the filepath ids that haven't been processed, the file format # of this file is filepath_id[tab]checksum[tab]filesize fpath = join(dirname(abspath(__file__)), '74.py.cache.tsv') processed = [] if exists(fpath): with open(fpath, 'r') as f: processed = [int(line.split('\t')[0]) for line in f.read().split('\n') if line != ''] files_curr = [f for f in files if f['filepath_id'] not in processed] # let's use 20 processor and in each iteration use 120 files fids = 120 processors = 20