def object_file_path(obj, base_path, folder_name, filename, create_dir=False): filename = sanitize_filename(filename) storage_dir = os.path.join(base_path, obj.__tablename__, folder_name, str(obj.client_id), str(obj.object_id)) if create_dir: os.makedirs(storage_dir, exist_ok=True) storage_path = os.path.join(storage_dir, filename) return storage_path, filename
def safename(fn): """ returns cross-platform safe name WITHOUT directory """ if isinstance(fn,Path): fn = fn.name return pathvalidate.sanitize_filename(fn,'-')
def test_normal_pathlike(self, value, replace_text, expected): sanitized_name = sanitize_filename(value, replace_text) assert sanitized_name == expected assert is_pathlike_obj(sanitized_name) validate_filename(sanitized_name) assert is_valid_filename(sanitized_name)
def writeplots(fg, plotprefix, tind=None, odir=None, fmt='.png', anno=None, dpi=None, facecolor=None, doclose=True): try: if fg is None or odir is None: return # %% draw() # Must have this here or plot doesn't update in animation multiplot mode! # TIF was not faster and was 100 times the file size! # PGF is slow and big file, # RAW crashes # JPG no faster than PNG suff = nametime(tind) if anno: fg.text(0.15, 0.8, anno, fontsize='x-large') if pathvalidate is not None: cn = Path(odir).expanduser() / pathvalidate.sanitize_filename(plotprefix + suff + fmt) else: cn = Path(odir).expanduser() / (plotprefix + suff + fmt) print('write', cn) if facecolor is None: facecolor = fg.get_facecolor() fg.savefig(cn, bbox_inches='tight', dpi=dpi, facecolor=facecolor, edgecolor='none') if doclose: close(fg) except Exception as e: logging.error(f'{e} when plotting {plotprefix}')
def object_file_path(obj, settings, data_type, filename, create_dir=False): filename = sanitize_filename(filename) base_path = settings['storage']['path'] storage_dir = os.path.join(base_path, obj.__tablename__, data_type, str(obj.client_id), str(obj.object_id)) if create_dir: os.makedirs(storage_dir, exist_ok=True) storage_path = os.path.join(storage_dir, filename) return storage_path, filename
def command_to_filename(command, suffix=""): sep_char = "/\\" command = command.strip() filename = command.replace(" ", "_") filename = filename.replace("-", "") filename = filename.strip(sep_char).lstrip(sep_char) filename = re.sub("[%s]" % re.escape("/\\"), "-", filename) filename = pathvalidate.sanitize_filename(filename) if dataproperty.is_not_empty_string(suffix): filename += "_" + suffix return filename
def _preprocess_table_name(self): try: new_name = pv.sanitize_filename(self._tabledata.table_name, replacement_text="_") except TypeError: raise NameValidationError( "table name must be a string: actual='{}'".format(self._tabledata.table_name) ) new_name = pv.replace_unprintable_char(new_name, replacement_text="") new_name = pv.replace_symbol(new_name, replacement_text="_") new_name = new_name.replace(" ", "_") new_name = re.sub("_+", "_", new_name) new_name = new_name.strip("_") return new_name
def writeplots(fg, t="", odir=None, ctxt="", ext=".png"): from matplotlib.pyplot import close if odir: odir = Path(odir).expanduser() odir.mkdir(parents=True, exist_ok=True) if isinstance(t, (DataArray)): t = datetime.fromtimestamp(t.item() / 1e9, tz=UTC) elif isinstance(t, (float, integer_types)): # UTC assume t = datetime.fromtimestamp(t / 1e9, tz=UTC) #:-6 keeps up to millisecond if present. ppth = odir / pathvalidate.sanitize_filename(ctxt + str(t)[:-6] + ext, "-").replace(" ", "") print("saving {}".format(ppth)) fg.savefig(str(ppth), dpi=100, bbox_inches="tight") close(fg)
def create_filepath(course, path): course_name = course["OrgUnit"]["Name"] course_name = course_prefix.sub("", course_name) return "/".join([sanitize_filename(course_name)] + [sanitize_filename(module["Title"]) for module in path])
def test_normal_multibyte(self, value, replace_text, expected): sanitized_name = sanitize_filename(value, replace_text) assert sanitized_name == expected validate_filename(sanitized_name) assert is_valid_filename(sanitized_name)
def download_id_by_type(client, item_id, path, quality, album=False, embed_art=False, albums_only=False): """ Download and get metadata by ID and type (album or track) :param Qopy client: qopy Client :param int item_id: Qobuz item id :param str path: The root directory where the item will be downloaded :param int quality: Audio quality (5, 6, 7, 27) :param bool album: album type or not :param embed_art album: Embed cover art into files :param bool albums_only: Ignore Singles, EPs and VA releases """ count = 0 if album: meta = client.get_album_meta(item_id) if albums_only and (meta.get("release_type") != "album" or meta.get("artist").get("name") == "Various Artists"): print("Ignoring Single/EP/VA: " + meta.get("title", "")) return album_title = get_title(meta) print("\nDownloading: {}\n".format(album_title)) dirT = ( meta["artist"]["name"], album_title, meta["release_date_original"].split("-")[0], get_format(client, meta, quality), ) sanitized_title = sanitize_filename("{} - {} [{}] [{}]".format(*dirT)) dirn = os.path.join(path, sanitized_title) os.makedirs(dirn, exist_ok=True) get_extra(meta["image"]["large"], dirn) if "goodies" in meta: try: get_extra(meta["goodies"][0]["url"], dirn, "booklet.pdf") except Exception as e: print("Error: " + e) media_numbers = [ track["media_number"] for track in meta["tracks"]["items"] ] is_multiple = True if len([*{*media_numbers}]) > 1 else False for i in meta["tracks"]["items"]: parse = client.get_track_url(i["id"], quality) if "sample" not in parse and parse["sampling_rate"]: is_mp3 = True if int(quality) == 5 else False download_and_tag( dirn, count, parse, i, meta, False, is_mp3, embed_art, i["media_number"] if is_multiple else None, ) else: print("Demo. Skipping") count = count + 1 else: parse = client.get_track_url(item_id, quality) if "sample" not in parse and parse["sampling_rate"]: meta = client.get_track_meta(item_id) track_title = get_title(meta) print("\nDownloading: {}\n".format(track_title)) dirT = ( meta["album"]["artist"]["name"], track_title, meta["album"]["release_date_original"].split("-")[0], get_format(client, meta, quality, True), ) sanitized_title = sanitize_filename( "{} - {} [{}] [{}]".format(*dirT)) dirn = os.path.join(path, sanitized_title) os.makedirs(dirn, exist_ok=True) get_extra(meta["album"]["image"]["large"], dirn) is_mp3 = True if int(quality) == 5 else False download_and_tag(dirn, count, parse, meta, meta, True, is_mp3, embed_art) else: print("Demo. Skipping") print("\nCompleted\n")
def ta_upload(): # below functions checks if the file name is in the type of images required def allowed_file(filename): return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS # to render the basic html page that will showcase upload api to the world if we get a GET request if request.method == 'GET': return render_template('users/ta_upload.html') # below are a number of validation checks for the api . These are done using wtforms in the main app if 'file' not in request.files: error_msg = "Did you forget the file? " return Response(error_msg, status=401) username = request.form.get('username') password = request.form.get('password') file = request.files.get('file') if username == None or password == None or file == None: error_msg = "Welcome - Please input credentials and file" return Response(error_msg, status=401) if len(username) <2 or len(username) > 30 or len(password) <2 or len(password) > 30: error_msg = "Username or Password do not meet the length requirements" return Response(error_msg, status=401) user = User.query.filter_by(username=username).first() if user is None: error_msg = "User Not Registered!!" return Response(error_msg, status=401) if not user.check_password(password): error_msg = "Invalid username and password combination" return Response(error_msg, status=401) if file and allowed_file(file.filename): filename = secure_filename(file.filename) # checks for any spaces and security flaws and fixes them clean_name = sanitize_filename(urllib.parse.unquote(filename)).replace(" ", "_") # this function performs similar task and used as an insurance rand_name = urandom(4).hex() + clean_name # save original image real_name = ntpath.basename(image_uploadset.save(file, folder='originals', name=rand_name)) print("rand_name",rand_name) print("real_name",real_name) original_path = './images/originals/'+real_name image_cv = cv2.imread(original_path) # Face detection ## need to tell the location of the classifier manually!! #face_cascade = cv2.CascadeClassifier('C:/Users/mihir/PycharmProjects/A1_ECE1779/venv/Lib/site-packages/cv2/data/haarcascade_frontalface_default.xml') #face_cascade = cv2.CascadeClassifier('/Users/bibinsebastian/Dropbox/UofT/ECE1779/A2_ECE1779/venv/lib/python3.6/site-packages/cv2/data/haarcascade_frontalface_default.xml') face_cascade = cv2.CascadeClassifier('/home/ubuntu/Desktop/ece1779/A2_ECE1779/venv/lib/python3.7/site-packages/cv2/data/haarcascade_frontalface_default.xml') #face_cascade = cv2.CascadeClassifier('/Users/ragnoletto/Documents/School/UofT/ECE1779/assignments/A1_ECE1779/venv/lib/python3.7/site-packages/cv2/data/haarcascade_frontalface_default.xml') gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale(gray, 1.3, 5) face_cv = image_cv.copy() #this is neeeded since it will throw error if there is no image in the picture for (x,y,w,h) in faces: face_cv = cv2.rectangle(image_cv, (x,y) , (x + w,y + h), (0,0,255),8) roi_gray = gray[y:y+h, x:x+w] roi_color = face_cv[y:y+h, x:x+w] picture_path = './images/faces/'+real_name cv2.imwrite(picture_path, face_cv) cv2.waitKey(0) # try: # image = Image(user_id=user.id, file_name=real_name, num_faces = len(faces)) # db.session.add(image) # db.session.commit() # success_msg = "File Upload Success" # return Response(success_msg, status=201) # except Exception as error: # db.session.rollback() # return Response("DB Error, rollback", status=500) try: image = Image(user_id=user.id, file_name=real_name, num_faces = len(faces)) db.session.add(image) db.session.commit() s3.upload_file(original_path,webapp.config["S3_BUCKET"],real_name) s3.upload_file(picture_path,webapp.config["S3_BUCKET"],'f_'+real_name) os.remove(original_path) os.remove(picture_path) success_msg = "Upload Success" return Response(success_msg, status=201) except Exception as error: db.session.rollback() return Response("DB Error, rollback", status=500) else: error_msg = "Not an Image" return Response(error_msg, status=401)
def get_comment_file(): return "%s/%s.json" % ( COMMENTSDIR, pathvalidate.sanitize_filename(get_current_showname()))
def download_from_ism(self, url, output_name, output_format): r = self.session.get(f'{url}/manifest') manifest = xmltodict.parse(r.content, force_list={'StreamIndex', 'c'}) self.logger.debug(json.dumps(manifest, indent=4)) streams = [x for x in manifest['SmoothStreamingMedia']['StreamIndex'] if x['@Type'] == 'text'] has_subtitles = False for (index, stream) in enumerate(streams): has_subtitles = True lang = stream['@Language'].lower() fmt = stream['QualityLevel']['@FourCC'].upper() if fmt != 'TTML': self.logger.error(f'Stream has unsupported subtitle format: {fmt!r}') sys.exit(1) index += 1 output = f'{output_name.replace(" ", ".")}.{lang}.{index}.{output_format}' output = pathvalidate.sanitize_filename(output) output = os.path.join(self.args.output_dir, output) self.logger.info(f'Downloading subtitle track #{index} ({lang})') path = stream['@Url'].replace('{bitrate}', stream['QualityLevel']['@Bitrate']) t = 0 ts = [] for c in stream['c']: if c.get('@t'): t = int(c['@t']) ts.append(t) if not c.get('@d'): # Stream only has a single segment break for i in range(c.get('@r', 1)): t += int(c['@d']) ts.append(t) ts = ts[:-1] # Remove nonexistent last segment xml = {'tt': {'body': {'div': {'p': []}}}} for t in tqdm(ts, unit='seg', disable=self.args.debug): seg_url = f'{url}/{path.replace("{start time}", str(t))}' seg = self.session.get(seg_url).content if not seg: # Empty segment continue data = self.ismt_to_ttml(seg).decode('utf-8') assert '{{BR}}' not in data, 'input data contains br placeholder' data = re.sub(r'(?i)<br\s*/?>\s*(</br>)?', '{{BR}}', data) xml_seg = xmltodict.parse( data, force_list={'p'}, process_namespaces=True, namespaces={ 'http://www.w3.org/XML/1998/namespace': None, 'http://www.w3.org/2006/10/ttaf1': None, 'http://www.w3.org/2006/10/ttaf1#metadata': None, 'http://www.w3.org/2006/10/ttaf1#styling': None, 'http://www.w3.org/ns/ttml': None, }, ) if i == 0: fps_base = xml_seg['tt'].get('@ttp:frameRate') fps_mult = xml_seg['tt'].get('@ttp:frameRateMultiplier') if xml_seg['tt']['body']['div'] is None: xml_seg['tt']['body']['div'] = {'p': []} if fps_base: if fps_mult: mult = [int(x) for x in fps_mult.split(' ')] mult = truediv(*mult) else: mult = 1 fps = fps_base * fps_mult else: fps = 30 # Per TTML spec div = xml_seg['tt']['body']['div'] if div is None: # Empty subtitle file continue subs = div.get('p', []) scale = int(stream['@TimeScale']) offset = t / scale for p in subs: for a in ('@begin', '@end'): tc = p[a] if '.' in tc: (h, m, s) = [float(x) for x in tc.split(':')] f = 0 else: (h, m, s, f) = [int(x) for x in tc.split(':')] total = round(h*3600 + m*60 + s + f/fps + offset, 3) p[a] = f'{total}s' begin = float(p['@begin'][:-1]) end = float(p['@end'][:-1]) if end < begin: self.logger.error( f'End time is earlier than start time ({end} < {begin})', ) return xml['tt']['body']['div']['p'].extend(subs) xml_data = xmltodict.unparse(xml, pretty=True) xml_data = xml_data.replace('{{BR}}', '<br />') os.makedirs(self.args.output_dir, exist_ok=True) self.logger.info(f'Converting and saving to {output}') with open(output, 'wb') as fd: if output_format == 'ttml': fd.write(xml_data.encode('utf-8-sig')) elif output_format == 'srt': r = pycaption.DFXPReader().read(xml_data) w = pycaption.SRTWriter().write(r) fd.write(w.encode('utf-8-sig')) if not has_subtitles: self.logger.info('No subtitles available')
def test_normal_null_values(self, value, expected): assert sanitize_filename(value) == expected
def sanitize_name(name: str) -> str: new_name = sanitize_filename(name) return "BLANK" if new_name is "" else new_name
def write_out_html(subject, folder_name, body): """Not pretty, needs improving, especially with the arguments""" subject = sanitize_filename(subject) filename = f"{subject[:50]}.html" file_path = enumerate_file_path(os.path.join(folder_name, filename)) open(file_path, "w").write(body)
SAVE_LOCATION = "/Users/Andrew/Desktop/Oxford_emails/" SAVE_FILES = True FOLDER_LIMIT = None with open("credentials.json", "r") as read_file: credentials = json.load(read_file) username = credentials["username"] password = credentials["password"] server = credentials["server_name"] with imap_tools.MailBox(server).login(username, password) as mailbox: for folder in mailbox.folder.list(): mailbox_name = folder["name"] mailbox.folder.set(mailbox_name) mailbox_folder = make_folder_if_absent(SAVE_LOCATION, sanitize_filename(mailbox_name)) for i, msg in enumerate(mailbox.fetch(reverse=False, mark_seen=False, limit=FOLDER_LIMIT)): if SAVE_FILES: # Make folder for email thread with same name as msg.subject sanitized_subject = sanitize_filename(msg.subject) if sanitized_subject == '': sanitized_subject = enumerate_file_path('No subject') subject_folder = make_folder_if_absent(mailbox_folder, sanitized_subject) json_filename = enumerate_file_path(os.path.join(subject_folder, 'message.json')) encoded_message = email_to_json.json_encode(msg) write_to_file(json_filename, encoded_message, as_bytes=False) # To decode json representation of imap_tools.message.MailMessage object: # b = email_to_json.json_decode(a)
def dojointplot(ds,spec,freq,beamazel,optical,optazel,optlla,isrlla,heightkm,utopt,P): """ ds: radar data f1,a1: radar figure,axes f2,a2: optical figure,axes """ assert isinstance(ds,DataArray) #%% setup master figure fg = figure(figsize=(8,12)) gs = gridspec.GridSpec(2, 1, height_ratios=[3,1]) #%% setup radar plot(s) a1 = fg.add_subplot(gs[1]) plotsumionline(ds,a1,expfn(P['isrfn']),P['zlim']) h1 = a1.axvline(nan,color='k',linestyle='--') t1 = a1.text(0.05,0.95,'time=',transform=a1.transAxes,va='top',ha='left') #%% setup top optical plot if optical is not None: a0 = fg.add_subplot(gs[0]) clim = compclim(optical,lower=10,upper=99.99) h0 = a0.imshow(optical[0,...],origin='lower',interpolation='none',cmap='gray', norm=vidnorm,vmin=clim[0],vmax=clim[1]) a0.set_axis_off() t0 = a0.set_title('') #%% plot magnetic zenith beam azimg = optazel[:,1].reshape(optical.shape[1:]) elimg = optazel[:,2].reshape(optical.shape[1:]) optisrazel = projectisrhist(isrlla,beamazel,optlla,optazel,heightkm) br,bc = findindex2Dsphere(azimg,elimg,optisrazel['az'],optisrazel['el']) #hollow beam circle # a2.scatter(bc,br,s=500,marker='o',facecolors='none',edgecolor='red', alpha=0.5) #beam data, filled circle s0 = a0.scatter(bc,br,s=2700,alpha=0.6,linewidths=3, edgecolors=jet(linspace(ds.min().item(), ds.max().item()))) a0.autoscale(True,tight=True) fg.tight_layout() #%% time sync tisr = ds.time.values Iisr,Iopt = timesync(tisr,utopt,P['tlim']) #%% iterate first = True Writer = anim.writers['ffmpeg'] writer = Writer(fps=5, metadata=dict(artist='Michael Hirsch'), codec='ffv1') ofn = Path(P['odir']).expanduser() / ('joint_' + pathvalidate.sanitize_filename(str(datetime.fromtimestamp(utopt[0]))[:-3]) + '.mkv') print('writing {}'.format(ofn)) with writer.saving(fg, str(ofn),150): for iisr,iopt in zip(Iisr,Iopt): ctisr = tisr[iisr] #%% update isr plot h1.set_xdata(ctisr) t1.set_text('isr: {}'.format(ctisr)) #%% update hist plot if iopt is not None: ctopt = datetime.fromtimestamp(utopt[iopt], tz=UTC) h0.set_data(optical[iopt,...]) t0.set_text('optical: {}'.format(ctopt)) s0.set_array(ds.loc[ctisr]) #FIXME circle not changing magnetic zenith beam color? NOTE this is isr time index #%% anim if first and iopt is not None: plotazelscale(optical[iopt,...],azimg,elimg) show() first=False # draw(); pause(0.01) writer.grab_frame(facecolor='k') if ofn.suffix == '.png': try: writeplots(fg,ctopt,ofn,P['makeplot'],ctxt='joint') except UnboundLocalError: writeplots(fg,ctisr,ofn,P['makeplot'],ctxt='isr')
def test_exception_type(self, value, expected): with pytest.raises(expected): sanitize_filename(value) assert not is_valid_filename(value)
def create_filename_without_extension(item): return sanitize_filename(item["Title"])
def getImage(directory, image): filename = sanitize_filename(image) filepath = path.join(os.environ['EFFIGY_PATH'], directory, filename) return send_file(filepath, attachment_filename=filename)
def download( self, quality: str, file_name: str = None, path: str = None, multi_threading: bool = False, max_threads: int = None, use_ffmpeg: bool = True, include_intro: bool = False, delete_chunks: bool = True, on_progress=None, print_progress: bool = True, ): """Downloads the current episode in your selected quality. :param quality: The quality that you want to dowload. All the available are "ld" (360p), "sd" (480p), "hd" (720p) and "fullhd". Note that all qualities may not be available for all episodes. :type quality: str :param file_name: The name of the downloaded file. If left to None, the file will be named "[anime_name] - [episode_number].mp4". Macros are also supported, "<anititle>" will be replaced by the anime name, <ep> will be replaced by episode number and <eptitle> will be replaced by the episodes title. For example, lets say that the episode in question is the third episode of the anime called "Vinland Saga". The title of the episode is "Troll". Suppose we pass the string ``"<anititle> - <ep> - <eptitle>"``, the resulting file will be named ``"Vinland Saga - 3 - Troll.mp4"`` :type file_name: str, optional :param path: Path to where you want the downloaded video to be, defaults to None. If left None the current working directory i.e. the directory where the script calling the method lives is taken as the path. :type path: str, optional :param multi_threading: Set this to true to enable multithreaded downloading, defaults to False. Enabling this can offer significant performance benefits especially on faster connections. However this comes with a trade off. Using multi threading negatively affects download resumabilty. Therefore it is recommended that this be set to False when using slower connections. :type multi_threading: bool, optional :param max_threads: Set the maximum number of threads that will be used at once when using multi threaded downloading, defaults to None. When None, the maximum number of feasible threads will be used i.e one thread per chunk. :type max_threads: int, optional :param use_ffmpeg: Enable/disable using FFMPEG to combine the downloaded chunks, defaults to True. Requires FFMPEG. It is recommended to keep this enabled as not using FFMPEG can cause video playback issues on certain players. Using FFMPEG also results in noticibly smaller files. :type use_ffmpeg: bool, optional :param include_intro: Set this to true to include the 5 second aniwatch intro, defaults to False. It is recommended to skip the intro as this causes issues when combining the chunks with FFMPEG. :type include_intro: bool, optional :param delete_chunks: Set this to False to not delete the downloaded .ts chunks after they have been, combined into a single mp4 file. Defaults to True :type delete_chunks: bool, optional :param on_progress: Register a function that is called every time a new chunk is downloaded. The the number of chunks done and the total number of chunks are passed as arguments to the function in that exact order. Defaults to None. :type on_progress: function, optional :param print_progress: Print the number of chunks done and the total number of chunks to the console, defaults to True. :type print_progress: bool, optional """ m3u8 = self.get_m3u8(quality) if file_name is None: file_name = f"{self.anime_title[:128]}-{self.number}" else: file_name = (file_name.replace("<ep>", str(self.number)).replace( "<eptitle>", self.title).replace("<anititle>", self.anime_title[:128])) file_name = sanitize_filename(file_name) current_path = os.getcwd() if path: os.chdir(path) if multi_threading: dlr = MultiThreadDownloader(self.__network, m3u8, file_name, self.ep_id, max_threads, use_ffmpeg, include_intro, delete_chunks, self.__generate_default_headers()) else: dlr = Downloader(self.__network, m3u8, file_name, self.ep_id, use_ffmpeg, include_intro, delete_chunks, self.__generate_default_headers()) dlr.download() dlr.merge() if delete_chunks: dlr.remove_chunks() os.chdir(current_path)
def parse(self, response): self.print_dbg("Entered Parse Function") try: response = response.replace(encoding='utf8', body=response.text.encode('utf8')) self.count = self.count + 1 local_filename, remote_filename, image_folder = self.url_to_local_path( response.url) self.print_dbg("Downloading URL: {} to {}, {}".format( response.url, local_filename, image_folder)) with open(local_filename, 'wb') as f: f.write(response.body) if self.s3: s3file = self.s3path + remote_filename self.print_dbg( "Uploading to S3 bucket {}. s3 file= {} ".format( self.s3bucket, s3file)) self.s3.upload_file(local_filename, self.s3bucket, s3file) # response.replace(encoding='UTF-8') self.print_dbg("Extracting Image Links") pageImages = LinkExtractor(tags=['img'], attrs=['src'], deny_extensions=[], canonicalize=False) self.print_dbg("Done Extracting Image Links") # self.print_dbg(COLORS.OKBLUE + "Images:", pageImages.extract_links(response), COLORS.ENDC) for image in pageImages.extract_links(response): # Will download the images - it caches images and does not download same url twice self.print_dbg("Looping over image: " + image.url) image_url = image.url image_url_hash = hashlib.sha224( str(image_url).encode('utf-8')).hexdigest() filename = sanitize_filename(image_url[image_url.rfind("/") + 1:]) image_path = os.path.join(image_folder, filename) cache_file = os.path.join( os.path.join(self.cache_folder, "image_cache"), image_url_hash) file_exists = os.path.exists(image_path) if os.path.exists(cache_file) and not file_exists: # This file has been cached, no need to redownload, just copy copyfile(cache_file, image_path) elif not file_exists: # do not redownload if image exists on drive try: self.print_dbg("Downloading image:" + image_url) # with eventlet.Timeout(10): # image_content = requests.get(image_url, timeout=(5, 10), verify=False).content r = http.request('GET', image_url, preload_content=False, timeout=urllib3.Timeout(connect=5.0, read=10.0)) with open(image_path, 'wb') as out: while True: data = r.read(65536) if not data: break out.write(data) r.release_conn() # Write the image #f = open(image_path, 'wb') #f.write(image_content) #f.close() self.print_dbg( "Image download complete. Written to: " + image_path) # Copy to cache copyfile(image_path, cache_file) except Exception as e: self.print_dbg(COLORS.FAIL + "Exception: " + str(e) + COLORS.ENDC) if self.s3: remote_imgname = image_path[len(self.folder):] s3imgfile = self.s3path + remote_imgname self.print_dbg( "Uploading image to S3 bucket: {} ".format(s3imgfile)) self.s3.upload_file(image_path, self.s3bucket, s3imgfile) self.pages_parsed = self.pages_parsed + 1 self.print_dbg("Extracting Links") link_extractor = LinkExtractor(allow=self.url_path + ".*") for link in link_extractor.extract_links(response): self.print_dbg(COLORS.OKGREEN + "Yielding: ->" + link.url + COLORS.ENDC) yield scrapy.Request(link.url, callback=self.parse, errback=self.errback_httpbin) # self.print_dbg("Yielded") # response.follow(link, self.parse) except Exception as e: self.print_dbg(COLORS.FAIL + "Exception in parse: " + str(e) + COLORS.ENDC) self.print_dbg((response.headers, response.body[:1024])) # If we have parsed a lot of pages, reduce the error count # from exceptions here if self.pages_parsed > 20: self.error_count = self.error_count - 1 self.error_count = self.error_count + 1
def test_normal_space_or_period_at_tail(self, platform, value, expected): filename = sanitize_filename(value, platform=platform) assert filename == expected assert is_valid_filename(filename, platform=platform)
def run_(window: sg.Window, username, progresses: List[Tuple[sg.Text, sg.ProgressBar]], stopped: Event, lists=DEFAULT_LISTS, dupli_mode=0, dir=ml.get_default_dir(), thread_count=5): # dupli_mode: 0-move, 1-copy, 2-download again def progress_callback(): window['--PROGRESS_UPDATE--'].click() st.init(thread_count=thread_count) with ThreadPoolExecutor(max_workers=thread_count) as executor: try: for idx_type, list_type in enumerate(lists): if stopped.is_set(): break print(f'[I] Doing list of type "{list_type}"') fold = path_joins(ml.library_dir, username, list_type) # make anime list folder os.makedirs(fold, exist_ok=True) print('[I] Created folder of this anime list') page_num = 1 print(f'[I] Doing page number {page_num}') was_con_err_list = True while was_con_err_list: try: anime_list = mm.get_anime_list_for_page( username, list_type, page_num) was_con_err_list = False except ConnectionError as ce: cprint('[E] Was connection error, retrying') anime_list = anime_list # type: ignore while len(anime_list) > 0: if stopped.is_set(): break anime_in_page_done = 0 for idx_anime, anime in enumerate(anime_list): if stopped.is_set(): break if PAGE_LIMIT > 0 >= PAGE_LIMIT and anime_in_page_done: print(f'[I] Debug limit ({PAGE_LIMIT}) reached') break mal_id = anime['mal_id'] title, ops, eds = mm.get_cached(mal_id) print(f'[I] Got page for "{title} ({mal_id})') to_search = [f'{title} op {i + 1}' for i in range(ops)] + \ [f'{title} ed {i + 1}' for i in range(eds)] already_downloaded_in_anime = [] for idx_video, request in enumerate(to_search): if stopped.is_set(): break print(f'[I] YT request: "{request}"') search_res = VideosSearch( request, limit=1).result()['result'][0] response, video_title, video_id = search_res[ 'link'], search_res['title'], search_res['id'] filename = f'{request} ({str(mal_id)}) - {video_id}.mp3' filename = str(sanitize_filename(filename)) filepath = path_joins(fold, filename) if dupli_mode != 3: if filename in ml.files_already_downloaded: _username, _type = ml.files_already_downloaded[ filename] if username != _username: cprint( f'[*] Video found downloaded in folder of different user ({path_joins(_username, _type)}), copying it', 'yellow') ml.copy(username, list_type, filename) print('[I] File succesfully copied') continue elif dupli_mode == 0: cprint( f'[*] Video found downloaded in different list ({path_joins(_username, _type)}), moving it', 'yellow') ml.move(username, list_type, filename) print('[I] File succesfully moved') continue elif dupli_mode == 1: cprint( f'[*] Video found downloaded in different list ({path_joins(_username, _type)}), copying it', 'yellow') ml.copy(username, list_type, filename) print('[I] File succesfully copied') continue if response in already_downloaded_in_anime: cprint( f'[*] Video "{response}" already downloaded, skipping', 'yellow') continue print('[I] Waiting for free thread') while not st.can_be_added() and not stopped.is_set( ): sleep(0.1) # print(st.threads) print('[I] Found free') if stopped.is_set(): break free_index = st.find_free() song_thread = st.SongDownloadThread( response, filepath, request) future = executor.submit(song_thread.run, progress_callback, free_index) st.add_thread(song_thread, future) sleep(0.001) # cprint(f'[D] threads: {st.threads}') anime_in_page_done += 1 page_num += 1 print(f'[I] Doing page number {page_num}') was_con_err_list = True while was_con_err_list: try: anime_list = mm.get_anime_list_for_page( username, list_type, page_num) was_con_err_list = False except ConnectionError as ce: cprint('[E] Was connection error, retrying') except Exception as e: cprint('[E] ' + str(e), 'red') stopped.set() if stopped.is_set(): for song_thread in st.threads: if song_thread is not None: song_thread[1].cancel()
async def on_member_join(member): if not "Welcome" in [ server['enabled_modules'] for server in bot.config['servers'] if server['id'] == member.server.id ][0]: return template = Image.open('extras/template.png') draw = ImageDraw.Draw(template) user = member.name server = member.server.name img_fraction = 0.50 fontsize = 16 member_name = pathvalidate.sanitize_filename(member.name).replace( '(', '').replace(')', '').replace(' ', '') server_name = pathvalidate.sanitize_filename(member.server.name).replace( '(', '').replace(')', '').replace(' ', '') if member.avatar_url == "": member_avatar = requests.get(member.default_avatar_url) else: member_avatar = requests.get(member.avatar_url) with open('extras/{}.png'.format(member_name), 'wb') as f: for chunk in member_avatar.iter_content(chunk_size=1024): if chunk: f.write(chunk) member_avatar = Image.open('extras/{}.png'.format(member_name)) member_avatar = member_avatar.resize((182, 182), Image.ANTIALIAS) template.paste(member_avatar, (34, 71)) guild_icon = requests.get(member.server.icon_url) with open('extras/{}.png'.format(server_name), 'wb') as f: for chunk in guild_icon.iter_content(chunk_size=1024): if chunk: f.write(chunk) guild_icon = Image.open('extras/{}.png'.format(server_name)) guild_icon = guild_icon.resize((64, 64), Image.ANTIALIAS) template.paste(guild_icon, (660, 300)) font = ImageFont.truetype("extras/segoeui.ttf", fontsize) while font.getsize(user)[0] < img_fraction * template.size[0]: fontsize += 1 font = ImageFont.truetype("extras/segoeui.ttf", fontsize) fontsize -= 1 font = ImageFont.truetype("extras/segoeui.ttf", fontsize) if len(user) < 6: font = ImageFont.truetype("extras/segoeui.ttf", 58) draw.text((125, 290), user, (0, 0, 0), font=font) fontsize = 16 img_fraction = 0.25 font = ImageFont.truetype("extras/segoeui.ttf", fontsize) while font.getsize(server)[0] < img_fraction * template.size[0]: fontsize += 1 font = ImageFont.truetype("extras/segoeui.ttf", fontsize) fontsize -= 1 font = ImageFont.truetype("extras/segoeui.ttf", fontsize) if len(server) < 6: font = ImageFont.truetype("extras/segoeui.ttf", 32) draw.text((540, 255), server, (0, 0, 0), font=font) template.save('extras/finished.png') await bot.send_file( member.server.get_channel([ server['welcome_channel'] for server in bot.config['servers'] if server['id'] == member.server.id ][0]), 'extras/finished.png') os.remove('extras/finished.png') os.remove('extras/{}.png'.format(member_name)) os.remove('extras/{}.png'.format(server_name))
# Determine results count results_count = search_data['total'] results_pages = math.ceil(results_count / 50) # Exit script if no results if (results_count == 0): print('No results found for your query, sorry') exit() # Inform user of results count print('Found ' + str(results_count) + ' images, producing ' + str(results_pages) + ' pages of results') # Create results folders spaced_name = search_string.replace(":", " ") folder_name = pathvalidate.sanitize_filename(spaced_name) downloads_folder = './downloads/' + folder_name + '/' downloads_json_folder = downloads_folder + 'json/' try: os.makedirs(downloads_folder) os.makedirs(downloads_json_folder) except FileExistsError: print("Results directory already exists.") # Loop through each page counter = 1 for page in range(0, results_pages): # Generate a search URL for this page of results page_search_url = config[ 'site'] + '/api/v1/json/search/images?key=' + config[ 'key'] + '&perpage=50&page=' + str(
def upload_subjects( subject_set_id, manifest_files, allow_missing, remote_location, mime_type, file_column, ): """ Uploads subjects from each of the given MANIFEST_FILES. Example with only local files: $ panoptes subject-set upload-subjects 4667 manifest.csv Local filenames will be automatically detected in the manifest and uploaded, or filename columns can be specified with --file-column. If you are hosting your media yourself, you can put the URLs in the manifest and specify the column number(s): $ panoptes subject-set upload-subjects -r 1 4667 manifest.csv $ panoptes subject-set upload-subjects -r 1 -r 2 4667 manifest.csv Any local files will still be detected and uploaded. """ if (len(manifest_files) > 1 and any(map(lambda m: m.endswith('.yaml'), manifest_files))): click.echo( 'Error: YAML manifests must be processed one at a time.', err=True, ) return -1 elif manifest_files[0].endswith('.yaml'): with open(manifest_files[0], 'r') as yaml_manifest: upload_state = yaml.load(yaml_manifest, Loader=yaml.FullLoader) if upload_state['state_version'] > CURRENT_STATE_VERSION: click.echo( 'Error: {} was generated by a newer version of the Panoptes ' 'CLI and is not compatible with this version.'.format( manifest_files[0], ), err=True, ) return -1 if upload_state['subject_set_id'] != subject_set_id: click.echo( 'Warning: You specified subject set {} but this YAML ' 'manifest is for subject set {}.'.format( subject_set_id, upload_state['subject_set_id'], ), err=True, ) click.confirm('Upload {} to subject set {} ({})?'.format( manifest_files[0], subject_set_id, SubjectSet.find(subject_set_id).display_name, ), abort=True) upload_state['subject_set_id'] = subject_set_id resumed_upload = True else: upload_state = { 'state_version': CURRENT_STATE_VERSION, 'subject_set_id': subject_set_id, 'manifest_files': manifest_files, 'allow_missing': allow_missing, 'remote_location': remote_location, 'mime_type': mime_type, 'file_column': file_column, 'waiting_to_upload': [], 'waiting_to_link': {}, } resumed_upload = False remote_location_count = len(upload_state['remote_location']) mime_type_count = len(upload_state['mime_type']) if remote_location_count > 1 and mime_type_count == 1: upload_state['mime_type'] = (upload_state['mime_type'] * remote_location_count) elif remote_location_count > 0 and mime_type_count != remote_location_count: click.echo( 'Error: The number of MIME types given must be either 1 or equal ' 'to the number of remote locations.', err=True, ) return -1 def validate_file(file_path): if not os.path.isfile(file_path): click.echo( 'Error: File "{}" could not be found.'.format(file_path, ), err=True, ) return False file_size = os.path.getsize(file_path) if file_size == 0: click.echo( 'Error: File "{}" is empty.'.format(file_path, ), err=True, ) return False elif file_size > MAX_UPLOAD_FILE_SIZE: click.echo( 'Error: File "{}" is {}, larger than the maximum {}.'.format( file_path, humanize.naturalsize(file_size), humanize.naturalsize(MAX_UPLOAD_FILE_SIZE), ), err=True, ) return False return True def get_index_fields(headers): index_fields = [ header.lstrip('%') for header in headers if header.startswith('%') ] return ",".join(str(field) for field in index_fields) subject_set = SubjectSet.find(upload_state['subject_set_id']) if not resumed_upload: subject_rows = [] for manifest_file in upload_state['manifest_files']: with open(manifest_file, 'U') as manifest_f: file_root = os.path.dirname(manifest_file) r = csv.reader(manifest_f, skipinitialspace=True) headers = next(r) # update set metadata for indexed sets index_fields = get_index_fields(headers) if index_fields: subject_set.metadata['indexFields'] = index_fields subject_set.save() # remove leading % from subject metadata headings cleaned_headers = [header.lstrip('%') for header in headers] for row in r: metadata = dict(zip(cleaned_headers, row)) files = [] if not upload_state['file_column']: upload_state['file_column'] = [] for field_number, col in enumerate(row, start=1): file_path = os.path.join(file_root, col) if os.path.exists(file_path): upload_state['file_column'].append( field_number, ) if not validate_file(file_path): return -1 files.append(file_path) else: for field_number in upload_state['file_column']: file_path = os.path.join(file_root, row[field_number - 1]) if not validate_file(file_path): return -1 files.append(file_path) for field_number, _mime_type in zip( upload_state['remote_location'], upload_state['mime_type'], ): files.append({_mime_type: row[field_number - 1]}) if len(files) == 0: click.echo( 'Could not find any files in row:', err=True, ) click.echo(','.join(row), err=True) if not upload_state['allow_missing']: return -1 else: continue subject_rows.append((files, metadata)) if not subject_rows: click.echo( 'File {} did not contain any rows.'.format( manifest_file, ), err=True, ) return -1 subject_rows = list(enumerate(subject_rows)) upload_state['waiting_to_upload'] = copy.deepcopy(subject_rows) else: for subject_id, subject_row in upload_state['waiting_to_link'].items(): try: subject = Subject.find(subject_id) except PanoptesAPIException: upload_state['waiting_to_upload'].append(subject_row) del upload_state['waiting_to_link'][subject_id] subject_rows = copy.deepcopy(upload_state['waiting_to_upload']) pending_subjects = [] def move_created(limit): while len(pending_subjects) > limit: for subject, subject_row in pending_subjects: if subject.async_save_result: pending_subjects.remove((subject, subject_row)) upload_state['waiting_to_upload'].remove(subject_row) upload_state['waiting_to_link'][subject.id] = subject_row time.sleep(0.5) def link_subjects(limit): if len(upload_state['waiting_to_link']) > limit: subject_set.add(list(upload_state['waiting_to_link'].keys())) upload_state['waiting_to_link'].clear() with click.progressbar( subject_rows, length=len(subject_rows), label='Uploading subjects', ) as _subject_rows: try: with Subject.async_saves(): for subject_row in _subject_rows: count, (files, metadata) = subject_row subject = Subject() subject.links.project = subject_set.links.project for media_file in files: subject.add_location(media_file) subject.metadata.update(metadata) subject.save() pending_subjects.append((subject, subject_row)) move_created(MAX_PENDING_SUBJECTS) link_subjects(LINK_BATCH_SIZE) move_created(0) link_subjects(0) finally: if (len(pending_subjects) > 0 or len(upload_state['waiting_to_link']) > 0): click.echo('Error: Upload failed.', err=True) if click.confirm( 'Would you like to save the upload state to resume the ' 'upload later?', default=True, ): while True: state_file_name = 'panoptes-upload-{}.yaml'.format( subject_set_id, ) state_file_name = click.prompt( 'Enter filename to save to', default=state_file_name, ) if not state_file_name.endswith('.yaml'): click.echo( 'Error: File name must end in ".yaml".', err=True, ) if click.confirm( 'Save to {}.yaml?'.format(state_file_name), default=True, ): state_file_name += '.yaml' else: continue if not is_valid_filename(state_file_name): click.echo( 'Error: {} is not a valid file name'.format( state_file_name, ), err=True, ) sanitized_filename = sanitize_filename( state_file_name, ) if click.confirm( 'Save to {}?'.format(sanitized_filename, ), default=True, ): state_file_name = sanitized_filename else: continue if os.path.exists(state_file_name): if not click.confirm( 'File {} already exists. Overwrite?'. format(state_file_name, ), default=False, ): continue break with open(state_file_name, 'w') as state_file: yaml.dump(upload_state, state_file)
def get_pkg_cache_filepath(self, package_name, filename): return self.__get_pkg_cache_dir(package_name).joinpath( sanitize_filename(filename))
def get_name_from_content_disposition(fname, ContentDisposition): result = re.findall('filename=\"(.+)\"', ContentDisposition) if len(result) > 0: result_name, result_ext = os.path.splitext(result[0]) return sanitize_filename(fname + result_ext) return None
def get_misc_cache_filepath(self, classifier_name, filename): return self.__get_misc_cache_dir(classifier_name).joinpath( sanitize_filename(filename))
def download_and_tag( root_dir, tmp_count, track_url_dict, track_metadata, album_or_track_metadata, is_track, is_mp3, embed_art=False, multiple=None, ): """ Download and tag a file :param str root_dir: Root directory where the track will be stored :param int tmp_count: Temporal download file number :param dict track_url_dict: get_track_url dictionary from Qobuz client :param dict track_metadata: Track item dictionary from Qobuz client :param dict album_or_track_metadata: Album/track dictionary from Qobuz client :param bool is_track :param bool is_mp3 :param bool embed_art: Embed cover art into file (FLAC-only) :param multiple: Multiple disc integer :type multiple: integer or None """ extension = ".mp3" if is_mp3 else ".flac" try: url = track_url_dict["url"] except KeyError: print("Track not available for download") return if multiple: root_dir = os.path.join(root_dir, "Disc " + str(multiple)) os.makedirs(root_dir, exist_ok=True) filename = os.path.join(root_dir, ".{:02}".format(tmp_count) + extension) new_track_title = sanitize_filename(track_metadata["title"]) track_file = "{:02}. {}{}".format(track_metadata["track_number"], new_track_title, extension) final_file = os.path.join(root_dir, track_file) if os.path.isfile(final_file): print(track_metadata["title"] + " was already downloaded. Skipping...") return desc = get_description(track_url_dict, track_metadata, multiple) tqdm_download(url, filename, desc) tag_function = metadata.tag_mp3 if is_mp3 else metadata.tag_flac try: tag_function( filename, root_dir, final_file, track_metadata, album_or_track_metadata, is_track, embed_art, ) except Exception as e: print("Error tagging the file: " + str(e)) os.remove(filename)
def __get_pkg_cache_dir(self, package_name): cache_dir = self.__base_dir.joinpath( sanitize_filename(package_name).lower()) cache_dir.makedirs_p() return cache_dir
def __processOnlineDocuments(self, isCountRun=False): if not self.onlineDocumentsDict: return menuWidth = 200 def __printStatus(idx, document, status=""): # fill idx to 5 chars idx = str(idx) idx = idx.zfill(5) if not isCountRun: self.__printLeftRight( idx + " - " + document["dateCreation"] + " - " + document["name"] + " - " + document["mimeType"], status, ".", menuWidth, ) overwrite = False # Only download new files useSubFolders = self.settings.getBoolValueForKey("useSubFolders") outputDir = self.settings.getValueForKey("outputDir") isDownloadOnlyFilename = self.settings.getBoolValueForKey( "downloadOnlyFilenames") downloadFilenameList = self.settings.getValueForKey( "downloadOnlyFilenamesArray") downloadOnlyFromOnlineArchive = self.settings.getBoolValueForKey( "downloadOnlyFromOnlineArchive") countAll = len(self.onlineDocumentsDict) countProcessed = 0 countSkipped = 0 countDownloaded = 0 # for idx in range(len(self.onlineDocumentsDict)): # documentMeta in enumerate(self.onlineDocumentsDict): for idx in self.onlineDocumentsDict: documentMeta = self.onlineDocumentsDict[idx] docName = documentMeta["name"] firstFilename = docName.split(" ", 1)[0] docMimeType = documentMeta["mimeType"] docCreateDate = documentMeta["dateCreation"] isDocAdvertisement = (True if str( documentMeta["advertisement"]).lower() == "true" else False) isDocArchived = (True if str( documentMeta["documentMetaData"]["archived"]).lower() == "true" else False) isAlreadyRead = (True if str( documentMeta["documentMetaData"]["alreadyRead"]).lower() == "true" else False) subFolder = "" myOutputDir = outputDir countProcessed += 1 # counting if isDocAdvertisement: self.onlineAdvertismentIndicesList.append(idx) if isDocArchived: self.onlineArchivedIndicesList.append(idx) if firstFilename in downloadFilenameList: self.onlineFileNameMatchingIndicesList.append(idx) if not isAlreadyRead: self.onlineUnreadIndicesList.append(idx) # check for setting "only download if filename is in filename list" if downloadOnlyFromOnlineArchive and not isDocArchived: __printStatus(idx, documentMeta, "SKIPPED - not in archive") countSkipped += 1 continue # check for setting "only download if filename is in filename list" if isDownloadOnlyFilename and not firstFilename in downloadFilenameList: __printStatus(idx, documentMeta, "SKIPPED - filename not in filename list") countSkipped += 1 continue if docMimeType == "application/pdf": subFolder = firstFilename docName += ".pdf" elif docMimeType == "text/html": docName += ".html" subFolder = "html" if useSubFolders: myOutputDir = os.path.join(outputDir, subFolder) if not os.path.exists(myOutputDir): os.makedirs(myOutputDir) filepath = os.path.join(myOutputDir, sanitize_filename(docName)) # check if already downloaded if os.path.exists(filepath): self.onlineAlreadyDownloadedIndicesList.append(idx) if not overwrite: __printStatus(idx, documentMeta, "SKIPPED - no overwrite") countSkipped += 1 continue else: self.onlineNotYetDownloadedIndicesList.append(idx) # do the download if not bool(self.settings.getBoolValueForKey( "dryRun")) and not isCountRun: docContent = self.conn.downloadMessage(documentMeta) moddate = time.mktime( datetime.datetime.strptime(docCreateDate, "%Y-%m-%d").timetuple()) with open(filepath, "wb") as f: f.write(docContent) # shutil.copyfileobj(docContent, f) os.utime(filepath, (moddate, moddate)) __printStatus(idx, documentMeta, "DOWNLOADED") countDownloaded += 1 else: __printStatus( idx, documentMeta, "DOWNLOADED - dry run, so not really downloaded") countDownloaded += 1 # last line, summary status: if not isCountRun: menuWidth = 74 self.__printFullWidth("--", "center", "-", menuWidth) self.__printFullWidth("Status Files Downloading", "left", "-", menuWidth) print("All: " + str(countAll) + " files") print("Processed: " + str(countProcessed) + " files") print("Downloaded: " + str(countDownloaded) + " files") print("Skipped: " + str(countSkipped) + " files")
path = f"{output}/" \ f"{sanitize_filename(course.name)}/" \ f"{sanitize_filename(module.name)}/" if not os.path.exists(path): os.makedirs(path) item_type = item.type print(f"{course.name} - " f"{module.name} - " f"{item.title} ({item_type})") if item_type == "File": file = canvas.get_file(item.content_id) files_downloaded.add(item.content_id) file.download(path + sanitize_filename(file.filename)) elif item_type == "Page": page = course.get_page(item.page_url) with open(path + sanitize_filename(item.title) + ".html", "w", encoding="utf-8") as f: f.write(page.body or "") files = extract_files(page.body or "") for file_id in files: if file_id in files_downloaded: continue try: file = course.get_file(file_id) files_downloaded.add(file_id) file.download(path + sanitize_filename(file.filename)) except ResourceDoesNotExist: pass elif item_type == "ExternalUrl":
def create_filename(item): extension = item["Url"].split(".")[-1] extension = extension.split("?")[0] return sanitize_filename(item["Title"]) + "." + extension
def test_dict_convert(self): import hashlib from time import sleep import json import os from pathvalidate import sanitize_filename real_filename = 'dictionary/test_dict_convert.sqlite' filename = sanitize_filename(real_filename) user_id = self.signup_common() self.login_common() root_ids = self.create_language('Корень') first_hash = hashlib.md5(open(real_filename, 'rb').read()).hexdigest() response = self.app.post('/blob', params = {'data_type':'dialeqt_dictionary'}, upload_files=([('blob', real_filename)])) self.assertEqual(response.status_int, HTTPOk.code) blob_ids = response.json response = self.app.get('/blobs/%s/%s' % (blob_ids['client_id'], blob_ids['object_id'])) self.assertEqual(response.status_int, HTTPOk.code) file_response = self.app.get(response.json['content']) second_hash = hashlib.md5(file_response.body).hexdigest() self.assertEqual(first_hash, second_hash) response = self.app.post_json('/convert_check', params={'blob_client_id': blob_ids['client_id'], 'blob_object_id': blob_ids['object_id']}) self.assertEqual(response.status_int, HTTPOk.code) self.assertEqual(response.json, []) # a = input() response = self.app.post_json('/convert', params={'blob_client_id': blob_ids['client_id'], 'blob_object_id': blob_ids['object_id'], 'parent_client_id':root_ids['client_id'], 'parent_object_id':root_ids['object_id']}) self.assertEqual(response.status_int, HTTPOk.code) self.assertDictEqual(response.json, {"status": "Your dictionary is being converted." " Wait 5-15 minutes and you will see new dictionary in your dashboard."}) not_found = True for i in range(3): response = self.app.post_json('/dictionaries', params={'user_created': [user_id]}) if response.json['dictionaries']: not_found = False break sleep(10) if not_found: self.assertEqual('error', 'dictionary was not found') dict_ids = response.json['dictionaries'][0] for i in range(20): response = self.app.get('/dictionary/%s/%s/state' % (dict_ids['client_id'], dict_ids['object_id'])) if response.json['status'].lower() == 'Converting 100%'.lower(): break sleep(60) response = self.app.get('/dictionary/%s/%s/perspectives' % (dict_ids['client_id'], dict_ids['object_id'])) self.assertEqual(response.status_int, HTTPOk.code) persp_ids = response.json['perspectives'][0] response = self.app.get('/dictionary/%s/%s/perspective/%s/%s/all' % (dict_ids['client_id'], dict_ids['object_id'], persp_ids['client_id'], persp_ids['object_id'])) # Uncomment to create json json_file = open('dictionary/test_dict_convert.json', 'w') json_file.write(json.dumps(response.json)) json_file.close() json_file = open('dictionary/test_dict_convert.json', 'r') correct_answer = json.loads(json_file.read()) self.assertDictEqual(response.json, correct_answer, set_like= True)
def parse_title_author(soup): header = soup.select_one("#content") title_tag = header.h1 author, title = title_tag.text.split(' \xa0 :: \xa0 ') return sanitize_filename(author), sanitize_filename(title)
def test_normal_max_len(self, value, max_len, expected): filename = sanitize_filename(value, max_len=max_len) assert len(filename) == expected assert is_valid_filename(filename, max_len=max_len)
def test_normal_str(self, platform, value, replace_text, expected): sanitized_name = sanitize_filename(value, platform=platform, replacement_text=replace_text) assert sanitized_name == expected assert isinstance(sanitized_name, str) validate_filename(sanitized_name, platform=platform) assert is_valid_filename(sanitized_name, platform=platform)
def __createFoldersFromList(self, folders, baseFolder=''): baseFolder = sanitize_filename(baseFolder) for folder in folders: folderName = os.path.join(self.path, baseFolder, folder) os.makedirs(folderName, True)
async def synthetic_ajax(request, url, parameters=None, key=None, sort_key=None, default=None): ''' Stub similar to google_ajax, but grabbing data from local files. This is helpful for testing, but it's even more helpful since Google is an amazingly unreliable B2B company, and this lets us develop without relying on them. At some point, we'll want to upgrade this to support small-scale deployments, with a directory tree such as e.g.: `course_rosters/[course_id].json` and `course_lists/[teacher_id].json` ''' if settings.settings['roster-data']['source'] == 'test': synthetic_data = { COURSE_URL: paths.data("courses.json"), ROSTER_URL: paths.data("students.json") } elif settings.settings['roster-data']['source'] == 'filesystem': print(request['user']) safe_userid = pathvalidate.sanitize_filename( request['user']['user_id']) courselist_file = "courselist-" + safe_userid if parameters is not None and 'courseid' in parameters: safe_courseid = pathvalidate.sanitize_filename( str(parameters['courseid'])) roster_file = "courseroster-" + safe_courseid else: roster_file = "default" synthetic_data = { ROSTER_URL: paths.data("course_rosters/{roster_file}.json".format( roster_file=roster_file)), COURSE_URL: paths.data("course_lists/{courselist_file}.json".format( courselist_file=courselist_file)) } else: print("PANIC!!! ROSTER!") print(settings.settings['roster-data']['source']) sys.exit(-1) try: data = json.load(open(synthetic_data[url])) except FileNotFoundError as e: print(e) raise aiohttp.web.HTTPInternalServerError( text="Server configuration error. " "No course roster file for your account. " "Please ask the sysadmin to make one. " "(And yes, they'll want to know about this issue;" "you won't be bugging them)") return data
def test_normal_reserved_name(self, value, test_platform, expected): filename = sanitize_filename(value, platform=test_platform) assert filename == expected assert is_valid_filename(filename, platform=test_platform)
parser.add_argument("--end_page", default=2, type=int, help="номер страницы, ДО которой закончить скачивание") parser.add_argument("--skip_imgs", action='store_true', help="не скачивать картинки") parser.add_argument("--skip_txt", action='store_true', help="не скачивать книги") parser.add_argument("--dest_folder", default=os.path.abspath(os.curdir), help="путь к каталогу с результатами парсинга: картинкам, книгами, JSON") parser.add_argument("--json_path", default='books.json', help="указать свой путь к *.json файлу с результатами") args = parser.parse_args() img_folder = os.path.join(args.dest_folder, img_subfolder) txt_folder = os.path.join(args.dest_folder, txt_subfolder) if args.json_path: json_file = sanitize_filename(args.json_path) pathlib.Path(txt_folder).mkdir(parents=True, exist_ok=True) pathlib.Path(img_folder).mkdir(parents=True, exist_ok=True) books = [] book_urls = get_book_url_from_pages(base_url, category, args.start_page, args.end_page) logger.info(f'Подготовлено {len(book_urls)} ссылок') for page_url in book_urls: local_timeout = timeout while local_timeout < 100: try: page = get_page(page_url) if not page: logger.info(f'{page_url} страницы с книгой нет на сайте')
def test_normal_str(self, platform, value, replace_text, expected): sanitized_name = sanitize_filename(value, platform=platform, replacement_text=replace_text) assert sanitized_name == expected assert isinstance(sanitized_name, six.text_type) validate_filename(sanitized_name, platform=platform) assert is_valid_filename(sanitized_name, platform=platform)