def run(): for model in models: for obj in model.objects.all(): for field in image_fields: image = getattr(obj, field, None) # обрабатываем картинки if field != 'gallery' and image: path = models_name_dir_map[obj.__class__.__name__] new_path = os.path.join( path, slugify.slugify_filename(image.filename) ) try: site.storage.move(image.path, new_path) image.path = new_path obj.save() except IOError: # файл не существует print "can't find this file - %s" % image.path continue except OSError, e: # директория не создана site.storage.makedirs(path) print '--- create dir - %s' % path site.storage.move(image.path, new_path) image.path = new_path obj.save() # обрабатываем папки с галереями elif image and image.is_folder: # for galleries path = models_name_dir_map['Gallery'] % { 'model': obj.__class__.__name__.lower(), 'name': slugify.slugify_filename(obj.name.lower()) } gallery = FileListing(image.path) for image_name in gallery.listing(): try: new_path = os.path.join( path, slugify.slugify_filename(image_name) ) site.storage.move( os.path.join(image.path, image_name), new_path ) except IOError, e: # файл не существует print "can't find this file - %s" % image_name continue except OSError, e: # директория не создана site.storage.makedirs(path) print '--- create dir - %s' % path site.storage.move( os.path.join(image.path, image_name), new_path ) # save new gallery path print 'save new gallery path' image.path = path obj.save()
def add_oil_object(session, file_columns, row_data): file_columns = [slugify_filename(c).lower() for c in file_columns] row_dict = dict(zip(file_columns, row_data)) fix_name(row_dict) fix_location(row_dict) fix_field_name(row_dict) add_record_date(row_dict) fix_pour_point(row_dict) fix_flash_point(row_dict) fix_preferred_oils(row_dict) oil = ImportedRecord(**row_dict) add_synonyms(session, oil, row_dict) add_densities(oil, row_dict) add_kinematic_viscosities(oil, row_dict) add_dynamic_viscosities(oil, row_dict) add_distillation_cuts(oil, row_dict) add_toxicity_effective_concentrations(oil, row_dict) add_toxicity_lethal_concentrations(oil, row_dict) session.add(oil) transaction.commit()
def url_to_filename(url): """ url is a string, e.g. 'https://law.stanford.edu/events/hello-world' Returns string representing path: law.stanford.edu/events_hello-world """ purl = urlparse(url) fpath = slugify_filename(purl.path + purl.params + purl.query + purl.fragment) return join(purl.netloc, fpath )
def _gen_csv_filename(payload1, payload2): # Generate a pretty filename for our CSV dump. index1 = payload1['index'] index2 = payload2['index'] q1 = _find_qstring(payload1) q2 = _find_qstring(payload2) if not q1: index1 = "" if not q2: index2 = "" return slugify_filename(" ".join(part for part in [index1, q1, index2, q2] if part)) + '.csv'
def _gen_csv_filename(payload1, payload2): # Generate a pretty filename for our CSV dump. index1 = payload1['index'] index2 = payload2['index'] q1 = _find_qstring(payload1) q2 = _find_qstring(payload2) if not q1: index1 = "" if not q2: index2 = "" return slugify_filename(" ".join( part for part in [index1, q1, index2, q2] if part)) + '.csv'
def recategorize_oil(session, file_columns, row_data): file_columns = [slugify_filename(c).lower() for c in file_columns] row_dict = dict(zip(file_columns, row_data)) try: oil_obj = (session.query(Oil).filter( Oil.adios_oil_id == row_dict['adios_oil_id']).one()) except Exception: logger.error('Re-categorize: could not query oil {}({})'.format( row_dict['oil_name'], row_dict['adios_oil_id'])) return logger.info('Re-categorizing oil: {}'.format(oil_obj.name)) remove_from_categories(session, oil_obj, row_dict['remove_from']) add_to_categories(session, oil_obj, row_dict['add_to'])
def get_archive_path(directory, name, format, source=None): if source: source = '-' + source.lower() timestamp = datetime.utcnow().strftime('%Y%m%dT%H%M%S') name = pathvalidate.sanitize_filename(name) name = slugify.slugify_filename(name) filename = 'uspto{source}_{timestamp}_{name}.{format}.zip'.format( name=name, timestamp=timestamp, source=source.lower(), format=format.lower()) filepath = os.path.join(directory, filename) return filepath
def recategorize_oil(session, file_columns, row_data): file_columns = [slugify_filename(c).lower() for c in file_columns] row_dict = dict(zip(file_columns, row_data)) try: oil_obj = (session.query(Oil) .filter(Oil.adios_oil_id == row_dict['adios_oil_id']) .one()) except Exception: logger.error('Re-categorize: could not query oil {}({})' .format(row_dict['oil_name'], row_dict['adios_oil_id'])) return logger.info('Re-categorizing oil: {}'.format(oil_obj.name)) remove_from_categories(session, oil_obj, row_dict['remove_from']) add_to_categories(session, oil_obj, row_dict['add_to'])
def get_record_date(file_columns, row_data): file_columns = [slugify_filename(c).lower() for c in file_columns] row_dict = dict(list(zip(file_columns, row_data))) oil_name = row_dict['oil_name'] adios_oil_id = row_dict['adios_oil_id'] reference = row_dict['reference'] if reference is None: ref_dates = ['no-ref'] reference = '' else: p = re.compile(r'\d{4}') m = p.findall(reference) if len(m) == 0: ref_dates = ['no-date'] else: ref_dates = m return (oil_name, adios_oil_id, ', '.join(list(set(ref_dates))), reference)
def normalize(string): if string in CUSTOM_FIXES: return CUSTOM_FIXES[string] string = string.replace('.', '')\ .replace('®', '')\ .replace('™', '') \ .replace('*', '') \ .replace('/', '')\ .replace('+', 'replacewithplus') string = slugify_filename(string, to_lower=True)\ .replace('_', '-')\ .replace('replacewithplus', '+') if string.endswith('+'): string = string[:-1] + '-+' return string
def add_oil_object(session, file_columns, row_data): file_columns = [slugify_filename(c).lower() for c in file_columns] row_dict = dict(zip(file_columns, row_data)) fix_name(row_dict) fix_pour_point(row_dict) fix_flash_point(row_dict) fix_preferred_oils(row_dict) oil = ImportedRecord(**row_dict) add_synonyms(session, oil, row_dict) add_densities(oil, row_dict) add_kinematic_viscosities(oil, row_dict) add_dynamic_viscosities(oil, row_dict) add_distillation_cuts(oil, row_dict) add_toxicity_effective_concentrations(oil, row_dict) add_toxicity_lethal_concentrations(oil, row_dict) session.add(oil) transaction.commit()
def export_iphoto(args): with open(path.join(args.input, XML_FILENAME), 'rb') as fp: data = plistlib.load(fp) masters = data.get('Master Image List') ignored_albums = IGNORED_ALBUMS if not args.all: ignored_albums.append('allPhotosAlbum') albums = [ a for a in data['List of Albums'] if a['GUID'] not in IGNORED_ALBUMS ] for album in tqdm(albums): album_directory = path.join(args.output, slugify(album['AlbumName'])) makedirs(album_directory, exist_ok=True) for photo_key in album['KeyList']: master_element = masters.get(photo_key, None) if master_element is None: print(photo_key, 'not in Masters') continue image_path = master_element.get('ImagePath').split('Masters/')[-1] if image_path.startswith('/'): continue image_filename = path.basename(image_path) full_source_path = path.join(args.input, 'Masters', image_path) full_dest_path = path.join( album_directory, slugify_filename(photo_key + '__' + image_filename)) try: copyfile(full_source_path, full_dest_path) except FileNotFoundError: pass
def process_images(self): for image in self.soup.find_all("img"): local_file = False image["src"] = image["src"].strip("//") if "." in image["src"].split("/")[0]: image["src"] = "http://" + image["src"] if len(urlparse(image["src"]).scheme) == 0: image["src"] = urljoin(self.parent_path, image["src"]) if self.path is not None: local_file = True local_name = image["src"].split("/")[-1] local_name = slugify_filename(local_name) if "." not in local_name: local_name = "{}.{}".format(int(time.time()*10**5), local_name) local_name = local_name.encode("ascii", "ignore").decode("ascii") local_path = os.path.join(self.img_directory, local_name[-50:]) if not local_file: try: u = urlsplit(image["src"]) u = u._replace(path=quote(u.path.encode('utf8'))) urllib.request.urlretrieve(u.geturl(), local_path) except HTTPError as e: local_path = "404" else: copyfile(image["src"], local_path) image["src"] = local_path if image["src"] != "404": self.resize_image(image["src"]) image.attrs = {k: v for k, v in image.attrs.items() if k in ["src", "alt"]}
def pyup(clipboard, launch, uuid, force, source_file, target): # app_config = AppConfig(app_name=__app_name__, config_file='pyup.ini') # app_config.read_config() target_config = TargetConfig(app_name=__app_name__, config_file='targets.ini') if not target_config.read_config(target): return False if not target_config.is_complete(): click.echo(style_error('Config values are missing.')) return False if uuid: target_filename = filename_to_uuid(source_file) else: target_filename = str(os.path.basename(source_file)) try: from slugify import slugify_filename target_filename = slugify_filename(target_filename) except ImportError: click.echo(style_warning('slugify not available. Skipping! ' '(pip install awesome-slugify if you want to use this feature)')) remote_url = upload_file(target_config, source_file, target_filename, force) remote_url_safe = urllib.parse.quote(remote_url, safe='~:/') if clipboard: try: import pyperclip pyperclip.copy(remote_url_safe) except ImportError: click.echo(style_warning('pyperclip not installed. Unable to copy URL to clipboard.')) if launch: click.launch(remote_url_safe) click.echo(style_success(remote_url_safe))
def on_put(self, req, resp, repo): ''' Get data based on a file object or b64 data, save and commit it repo can be repository name or id. ''' cr = req.headers.get('CONTENT-RANGE') userInfo = getUserInfoFromSession(req, resp) uploader = userInfo.get('id') try: repo = int(repo) targetRepo = Repository.query.filter_by(id=repo).scalar() except ValueError: targetRepo = Repository.query.filter_by(name=repo).scalar() if not uploader: uploader = -1 targetRepo = Repository.query.filter_by(name='public').scalar() targetUser = User.query.filter_by(id=uploader).scalar() if not targetRepo: targetRepo = Repository.query.filter_by(name=repo).scalar() if not targetRepo: targetRepo = Repository( name=repo, path=os.path.join(public_repository_path, repo)) req.session.add(targetRepo) ''' When client sends md5, it means that there is probabaly an exsisting file with that md5 So we server doesnt need file data. Just need to link old data ''' _md5 = req.get_param('md5') tags = [] _tags = req.get_param('tags') if _tags: tags = [tag.strip() for tag in _tags.split(',')] _cid = req.get_param('collection_id') collection = None if _cid: collection = Collection.query.filter_by(repository=targetRepo)\ .filter_by(id=_cid).scalar() _cname = req.get_param('collection') if _cname: collection = Collection.query.filter_by(repository=targetRepo)\ .filter_by(name=_cname).scalar() if not collection: now = arrow.utcnow() today = now.format('YYYY-MM-DD') cpath = '%s.%s'%(userInfo.get('alias'), today) collection = Collection.query.filter_by(path=cpath)\ .filter_by(repository=targetRepo).scalar() if not collection: collection = Collection(path=cpath, repository=targetRepo) req.session.add(collection) _subpath = req.get_param('subpath') if _subpath: sub_collections = _subpath.strip().split('/')[:-1] _scp = None for sc in sub_collections: scdb = Collection.query.filter_by(repository=targetRepo)\ .filter_by(parent=_scp or collection).filter_by(name=sc).first() if not scdb: scdb = Collection(path=sc, parent=_scp or collection, repository=targetRepo) req.session.add(scdb) req.session.flush() _scp = scdb if sub_collections: collection = scdb ## set collection to last subpath folder body = req.stream b64 = req.get_param('b64') thumbnail = req.get_param('thmb') mt = req.get_param('multipart') mtname = None if mt: try: fs = cgi.FieldStorage(fp=req.stream, environ=req.env) except (ValueError, IOError): resp.status = falcon.HTTP_400 resp.body = {'message': 'Error in myltipart data'} return _cgi_data = fs['files[]'] body = _cgi_data.file if fs.has_key('thumbnail'): # thumbnails are dataURLs thumbnail = fs['thumbnail'].file.read() mtname = _cgi_data.filename attach_to = req.get_param('attach_to') if targetRepo and (body or _md5): if not mtname: name = req.get_param( 'name') or 'undefined.%s.raw' % _generate_id() else: name = mtname if name: name = slugify_filename(name) name = name.decode('utf-8') assetExt = name.split('.')[-1] content_type = contenttype(name) assetPath = name tempraryStoragePath = path.join(targetRepo.path, collection.path, name) #name = os.path.basename(tempraryStoragePath) if _md5: availableAsset = Asset.query.filter_by(key=_md5).join( Collection).filter_by(repository_id=targetRepo.id).first() if availableAsset: # create folder if not available checkPath(os.path.dirname(tempraryStoragePath)) if os.path.isfile(tempraryStoragePath): os.remove(tempraryStoragePath) os.symlink(availableAsset.full_path, tempraryStoragePath) bodyMd5 = _md5 else: resp.status = falcon.HTTP_404 return else: if body: bodyMd5 = safeCopyAndMd5( req, body, tempraryStoragePath, targetRepo.id, targetUser, b64=b64, content_range=cr) # in uploading progress if bodyMd5 in ['IN_PROGRESS', 'IN_PROGRESS_NEW']: resp.body = {'info': bodyMd5} resp.status = falcon.HTTP_206 return else: resp.status = falcon.HTTP_204 return fullname = name name = (name[:10] + '..') if len(name) > 10 else name asset = Asset.query.filter( Asset.repository == targetRepo).filter_by(collection=collection)\ .filter_by(fullname=fullname).scalar() resp.status = falcon.HTTP_200 if not asset: _uid = getUUID() asset = Asset(key=bodyMd5, version=1, repository=targetRepo, uuid=_uid, collection=collection, name=name, fullname=fullname, path=assetPath, ext=assetExt, owner_id=targetUser.id) req.session.add(asset) resp.status = falcon.HTTP_201 else: if not bodyMd5 == asset.key: asset.version += 1 asset.name = name asset.fullname = fullname asset.key = bodyMd5 # Asset descriptionspath asset.path = assetPath if req.get_param('description'): asset.description = req.get_param('description') if targetUser: asset.modifiers.append(targetUser) asset.users.append(targetUser) if thumbnail: # thumbnail is base64 format fmt = 'png' fid = asset.uuid + '_thmb_' + str(asset.version) result = os.path.join('uploads', fid + '.' + fmt) thmbpath = os.path.join(public_upload_folder, fid + '.' + fmt) thmb_data = decodestring(unquote(thumbnail).split(',')[1]) with open(thmbpath, 'wb') as f: f.write(thmb_data) if attach_to: parent_id = int(attach_to) parent = Asset.query.filter_by(id=parent_id).scalar() asset.attached_to.append(parent) task_id = req.get_param('task_id') if task_id: target_task = Task.query.filter_by(id=task_id).scalar() target_task.assets.append(asset) if tags: asset.tags += tags collection.tags += tags req.session.flush() resp.body = {'key': asset.key, 'id':asset.id, 'url': asset.url, 'fullname': asset.fullname, 'uuid': asset.uuid, 'name': asset.name, 'content_type': asset.content_type.split('/')[0], 'datetime': time.time()} #resp.body = "I am working" else: # lets consume the stream! while True: chunk = req.stream.read(2 ** 22) if not chunk: break resp.status = falcon.HTTP_400 resp.body = {'message': 'Something Wrong!'}
def test_slugify_filename(self): self.assertEqual(slugify_filename(u'Дrаft №2.txt'), u'Draft_2.txt')
def request_submit(): props = { 'currentPage': 'requests', 'redirect': request.args.get('Referer') if request.args.get('Referer') else '/requests' } ip = request.headers.getlist("X-Forwarded-For")[0].rpartition( ' ' )[-1] if 'X-Forwarded-For' in request.headers else request.remote_addr if not request.form.get('user_id'): props['message'] = 'You didn\'t enter a user ID.' return make_response(render_template('error.html', props=props), 400) if getenv('TELEGRAMTOKEN'): snippet = '' with open('views/requests_new.html', 'r') as file: snippet = file.read() requests.post('https://api.telegram.org/bot' + getenv('TELEGRAMTOKEN') + '/sendMessage', params={ 'chat_id': '-' + getenv('TELEGRAMCHANNEL'), 'parse_mode': 'HTML', 'text': render_template_string(snippet) }) filename = '' try: if 'image' in request.files: image = request.files['image'] if image and image.filename and allowed_file( image.content_type, ['png', 'jpeg', 'gif']): filename = original = slugify_filename( secure_filename(image.filename)) tmp = join('/tmp', filename) image.save(tmp) limit = int(getenv('REQUESTS_IMAGES')) if getenv( 'REQUESTS_IMAGES') else 1048576 if stat(tmp).st_size > limit: abort(413) makedirs(join(getenv('DB_ROOT'), 'requests', 'images'), exist_ok=True) store = join(getenv('DB_ROOT'), 'requests', 'images', filename) copy = 1 while isfile(store): filename = splitext(original)[0] + '-' + str( copy) + splitext(original)[1] store = join(getenv('DB_ROOT'), 'requests', 'images', filename) copy += 1 move(tmp, store) except Exception as error: props['message'] = 'Failed to upload image. Error: {}'.format(error) return make_response(render_template('error.html', props=props), 500) scrub = Cleaner(tags=[]) text = Cleaner(tags=['br']) columns = ['service', '"user"', 'title', 'description', 'price', 'ips'] description = request.form.get('description').strip().replace( '\n', '<br>\n') params = (scrub.clean(request.form.get('service')), scrub.clean(request.form.get('user_id').strip()), scrub.clean(request.form.get('title').strip()), text.clean(description), scrub.clean(request.form.get('price').strip()), [sha256(ip.encode()).hexdigest()]) if request.form.get('specific_id'): columns.append('post_id') params += (scrub.clean(request.form.get('specific_id').strip()), ) if filename: columns.append('image') params += (join('/requests', 'images', filename), ) data = ['%s'] * len(params) query = "INSERT INTO requests ({fields}) VALUES ({values})".format( fields=','.join(columns), values=','.join(data)) cursor = get_cursor() cursor.execute(query, params) return make_response(render_template('success.html', props=props), 200)