def parse(query): if query == "": return File.select() tokens = tokenize(query) result = _parse(iter(tokens)) try: iter(result) except TypeError: result = File.select().join(Metadata, peewee.JOIN_LEFT_OUTER).where(result) return result
def get_entries(): """Gets all file names currently in catalog""" catalog_files = [] if File.select(): for item in File.select(): catalog_files.append(item.file_name) else: return catalog_files return catalog_files
def process(args, pool, log): files = File.select().where(File.inspected_at.is_null()).execute() paths = [pathlib.Path(f.path) for f in files] count_total = file_count_total(paths) size_total = file_size_total(paths) parse = functools.partial(record_for, args) count_processed = 0 size_processed = 0 for (path, result, messages) in pool.imap(parse, paths): #for (path, result, messages) in map(parse, input_iterator(args)): count_processed += 1 size_processed += file_size(path) for message in messages: print('error: {}: {}'.format(message, path)) if result is None: mark_processed(path) continue save_media(result) print('process: {} {} {}/{} {}/{}'.format( result['source'], result['path'], count_processed, count_total, humanize.naturalsize(size_processed), humanize.naturalsize(size_total)))
def view_all_entries(): """Gather all entries in the catalog, along with their metadata""" all_entries = (FileTag .select(FileTag, File, Tag) .join(Tag) .switch(FileTag) .join(File) .order_by(File.file_name)) # dictionary houses all files in catalog and each file's associated tags files_with_tags = {} for file_tag in all_entries: f = file_tag.file_id tag = file_tag.tag_id.tag_name files_with_tags.setdefault(f.file_name, []).append(tag) ordered_list = sorted([file_name for file_name in File.select()]) # Creates a list called of tuples called 'entries' # each tuple containing name of catalog file and metadata entries = [] for f_name in ordered_list: f_name.file_name f_name.description f_name.date_created tags = ' | '.join(sorted(files_with_tags.get(f_name.file_name, ''))) entries.append((f_name.file_name, f_name.description, f_name.date_created, tags)) return entries
def Token(token): if ':' in token: field, value = token.split(':', 1) else: field, value = 'tag', token files = File.select().join(Metadata, peewee.JOIN_LEFT_OUTER) return set(files.where((Metadata.field == field) & (Metadata.value == value)))
def get(self): obj = { 'categories': Category.select()[:], 'items': Item.select()[:], 'files': File.select()[:] } return json_response(obj, exclude=('blob'))
def fetch_me_file_count(args): if args.authorization is None and args.fingerprint is None: raise ApiError(status=401) if args.mimetype is not None and args.type is not None: raise ApiError('Choose Mimetype or Type, not both') mtype = None if args.type is not None: mtype = MimetypeType.get_or_none(id=args.type) if mtype is None: raise ApiError(metadata={'errors': {'type': 'Invalid Type'}}) mimetype = None if args.mimetype is not None: mimetype = Mimetype.get_or_none(id=args.mimetype) if mimetype is None: raise ApiError( metadata={'errors': { 'mimetype': 'Invalid Mimetype' }}) if args.authorization is not None: user = helpers.get_user(args.authorization) query = File.select().where(File.user == user) else: query = File.select().where(File.fingerprint == args.fingerprint) if mtype is not None: query = (query.join(Mimetype, JOIN.RIGHT_OUTER, on=(Mimetype.id == File.mimetype)).join( MimetypeType, JOIN.RIGHT_OUTER, on=(MimetypeType.id == Mimetype.type)).where( MimetypeType.id == mtype).switch(File)) if mimetype is not None: query = query.where(File.mimetype == mimetype) return ApiResponse({'count': query.count()})
def handle_T_key(self): # Get the File objects corresponding to the selected thumbnails files = File.select().where( File.md5 << [item.Text for item in self.get_selected_thumbs()]) # Determine the existing tags for these files. old_tags = Metadata.filter(Metadata.file << files, Metadata.field.not_in(['import-time'])) old_tags = sorted( list(set(['%s:"%s"' % (t.field, t.value) for t in old_tags]))) dialog = wx.TextEntryDialog(None, "Tags:", "Modifiy Tags", value=", ".join(old_tags)) if dialog.ShowModal() == wx.ID_OK: # Determine the new tags for these files. new_tags = dialog.GetValue() new_tags = [t.strip() for t in new_tags.split(",")] # Add any new tags that have been added. for token in set(new_tags) - set(old_tags): # Determine the actual field and tags if ':' in token: field, value = token.split(':', 1) else: field, value = 'tag', token value = value.strip('"') # Create records for all selected files. for file in files: try: Metadata(file=file, field=field, value=value).save() except IntegrityError: pass # Remove any tags that were removed. removed_tags = list(set(old_tags) - set(new_tags)) # Add any new tags that have been added. for token in set(old_tags) - set(new_tags): # Determine the actual field and tags if ':' in token: field, value = token.split(':', 1) else: field, value = 'tag', token # Create records for all selected files. value = value.strip('"') Metadata.delete().where(Metadata.file << files, Metadata.field == field, Metadata.value == value).execute() # Repaint the tag list. self.update_tags()
def handle_backspace_key(self): confirmDialog = wx.MessageDialog(self, "Remove %s Files?" % self.thumbnailGrid.SelectedItemCount, "Remove Files?", style=wx.OK | wx.CANCEL) confirmDialog.SetOKLabel("Delete") if confirmDialog.ShowModal() == wx.ID_OK: files = File.select().where(File.md5 << [item.Text for item in self.get_selected_thumbs()]) for file in files: file.delete_instance(recursive=True) # Repaint the tag list. self.update_tags() self.update_thumbnails() self.preview = None
def handle_backspace_key(self): confirmDialog = wx.MessageDialog(self, "Remove %s Files?" % self.thumbnailGrid.SelectedItemCount, "Remove Files?", style=wx.OK | wx.CANCEL) confirmDialog.SetOKLabel("Delete") if confirmDialog.ShowModal() == wx.ID_OK: files = File.select().where( File.md5 << [item.Text for item in self.get_selected_thumbs()]) for file in files: file.delete_instance(recursive=True) # Repaint the tag list. self.update_tags() self.update_thumbnails() self.preview = None
def handle_T_key(self): # Get the File objects corresponding to the selected thumbnails files = File.select().where(File.md5 << [item.Text for item in self.get_selected_thumbs()]) # Determine the existing tags for these files. old_tags = Metadata.filter(Metadata.file << files, Metadata.field.not_in(['import-time'])) old_tags = sorted(list(set(['%s:"%s"' % (t.field, t.value) for t in old_tags]))) dialog = wx.TextEntryDialog(None, "Tags:", "Modifiy Tags", value=", ".join(old_tags)) if dialog.ShowModal() == wx.ID_OK: # Determine the new tags for these files. new_tags = dialog.GetValue() new_tags = [t.strip() for t in new_tags.split(",")] # Add any new tags that have been added. for token in set(new_tags) - set(old_tags): # Determine the actual field and tags if ':' in token: field, value = token.split(':', 1) else: field, value = 'tag', token value = value.strip('"') # Create records for all selected files. for file in files: try: Metadata(file=file, field=field, value=value).save() except IntegrityError: pass # Remove any tags that were removed. removed_tags = list(set(old_tags) - set(new_tags)) # Add any new tags that have been added. for token in set(old_tags) - set(new_tags): # Determine the actual field and tags if ':' in token: field, value = token.split(':', 1) else: field, value = 'tag', token # Create records for all selected files. value = value.strip('"') Metadata.delete().where(Metadata.file << files, Metadata.field == field, Metadata.value == value).execute() # Repaint the tag list. self.update_tags()
def folder(folder_name): try: f = Folder.get(name=folder_name) except peewee.DoesNotExist: return jsonify(message='error'), 404 if request.method == 'POST': file = request.files['file'] if file: actual_filename = secure_filename( folder_name + '_' + file.filename) if os.path.exists(os.path.join(app.config['UPLOAD_FOLDER'], actual_filename)): return jsonify(message='error'), 409 file.save( os.path.join(app.config['UPLOAD_FOLDER'], actual_filename)) f2 = File.create(folder=folder_name, filename=file.filename, public_share_url=generate_url(), private_share_url=generate_url(), private_share_password=generate_password(), open_public_share=False, open_private_share=False) f2.save() return jsonify(message='OK'), 201 if request.method == 'GET': files = File.select().where(File.folder == folder_name) items = [{ 'filename': x.filename, 'public': x.public_share_url, 'private': x.private_share_url, 'password': x.private_share_password, 'openPublic': x.open_public_share, 'openPrivate': x.open_private_share } for x in files] return jsonify(message='OK', items=items) if request.method == 'DELETE': try: f.delete_instance() except peewee.IntegrityError: return jsonify(message='error'), 409 return jsonify(message='OK')
def __init__(self, parent): super(type(self), self).__init__(parent) self.add_dir = "~/Pictures" self._preview = None # Track the currently selected image, by md5. self._filter = "" # Track the current Filter self.filters = [] # Track previous Filters. # Only update the preview when we stop changing the list selection self.list_change_timer = wx.Timer(self) # Load the Grid self.thumbnail_index = {} self.thumbnails = wx.ImageList() self.thumbnailGrid.SetImageList(self.thumbnails, wx.IMAGE_LIST_NORMAL) for f in File.select(): self.thumbnail_index[f.md5] = self.thumbnails.Add(f.as_bitmap()) self.update_thumbnails() self.SetStatusText("Matching Images: %s" % self.thumbnailGrid.ItemCount) self.update_tags()
def NOT(token): return set(File.select().where(File.id.not_in([f.id for f in token])))
def get_files_with_point(point): query = (File.select().where(File.point == point).order_by(File.order_num)) return query
def fetch_files(args): if args.authorization is None and args.fingerprint is None: raise ApiError(status=401) if args.after is not None and args.before is not None: raise ApiError('Choose between before or after, not both') if args.mimetype is not None and args.type is not None: raise ApiError('Choose Mimetype or Type, not both') mtype = None if args.type is not None: mtype = MimetypeType.get_or_none(id=args.type) if mtype is None: raise ApiError(metadata={'errors': {'type': 'Invalid Type'}}) mimetype = None if args.mimetype is not None: mimetype = Mimetype.get_or_none(id=args.mimetype) if mimetype is None: raise ApiError( metadata={'errors': { 'mimetype': 'Invalid Mimetype' }}) files = (File.select(File, FileHash).join(FileHash).switch(File).order_by( File.id.desc()).limit(args.limit)) total = File.select(fn.COUNT(File.id).alias('count')) if args.authorization is not None: user = helpers.get_user(args.authorization) files = files.where(File.user == user) total = total.where(File.user == user) else: files = files.where(File.fingerprint == args.fingerprint) total = total.where(File.fingerprint == args.fingerprint) if mtype is not None: files = (files.join(Mimetype, JOIN.RIGHT_OUTER, on=(Mimetype.id == File.mimetype)).join( MimetypeType, JOIN.RIGHT_OUTER, on=(MimetypeType.id == Mimetype.type)).where( MimetypeType.id == mtype).switch(File)) total = (total.join(Mimetype, JOIN.RIGHT_OUTER, on=(Mimetype.id == File.mimetype)).join( MimetypeType, JOIN.RIGHT_OUTER, on=(MimetypeType.id == Mimetype.type)).where( MimetypeType.id == mtype).switch(File)) if mimetype is not None: files = files.where(File.mimetype == mimetype) total = total.where(File.mimetype == mimetype) if args.after is not None: files = files.where(File.id > args.after) elif args.before is not None: files = files.where(File.id < args.before) return ApiResponse({ 'total': total.scalar(), 'files': [x.to_dict() for x in files], })
def run(self, i=1): log.info("Starting tag process.") files = File.select(title_word_count=None, word_count=None).order_by(File.publish_date) rnn = rnntagger.RnnTagger("german") log.debug(f"Found {len(files)} files to tag.") for file in files: log.debug(f"Tagging file {i}") file_start = time.time() link = file.link if file.insert_date < datetime.now() - timedelta(days=365): continue if not link.startswith('http://') and not link.startswith( 'https://'): log.info(f"Invalid link given. Skipping. {link}") continue try: p = parser.Parser(link) text = p.get_text() size = int(p.get_size()) pages = int(p.get_pages()) except Exception as e: log.info(e) continue log.debug(f"Size: {size}") # roughly 40 seconds keywords_content = [] try: log.info(f"Start tagging in {len(text) +1} steps.") for t in text: keywords_content += rnn.tag(t) log.debug("Next step.") keywords_title = rnn.tag(file.title) except rnntagger.TaggerError as e: log.info(f"Skipping. Tagging failed with: {e}") continue log.debug("Sorting keywords") keywords_content_sorted = self.sort(keywords_content) keywords_title_sorted = self.sort(keywords_title) log.debug( f"Found {len(keywords_content)} words. Start inserting them.") tot_words = 0 for word, tags in keywords_content_sorted.items(): for tag, word_count in tags.items(): k_id = self._get_keyword_id(word, tag) if k_id is -1: continue FileKeywordContent(file_id=file.id, keyword_id=k_id, word_count=word_count) tot_words += word_count tot_title = 0 log.debug( f"Found {len(keywords_title)} words. Start inserting them.") for word, tags in keywords_title_sorted.items(): for tag, word_count in tags.items(): k_id = self._get_keyword_id(word, tag) if k_id is -1: continue f = FileKeyword(file_id=file.id, keyword_id=k_id, word_count=word_count) tot_title += word_count file.file_size = size file.word_count = tot_words file.title_word_count = tot_title file.pages = pages log.debug(f"File DUR: {time.time()-file_start}") i += 1 if i == 5: log.debug("Flushing session to avoid data loss.") self.run(i) log.info("Finished tagging. Nothing left to do.") return True
def HAS(_, token): return set(File.select().join(Metadata, peewee.JOIN_LEFT_OUTER).where(Metadata.field == token))
def run(self, files=0): if self.offset > 150: with db_session: this_scrape = Scrape.select(status="started").order_by(desc(Scrape.start)).first() if this_scrape is not None: this_scrape.status = "finished" this_scrape.end = datetime.now() this_scrape.processed_files = files log.info("To far back in time. Stopping.") return try: page = self.get_page() except resource_error.ResourceError as e: log.error(e) return with db_session: try: last_scrape = Scrape.select(status="finished").order_by(desc(Scrape.start)).first runtil_date = last_scrape.end - timedelta(days=1) except: last_scrape = None runtil_date = datetime.now() - timedelta(days=30) if self.offset is 0: this_scrape = Scrape(start=datetime.now(), status="started") for entry in page.split('<hr class="col-xs-12 divider divider-small">'): log.debug(f"Next file ({files}) ...") try: with db_session: soup = BeautifulSoup(entry, 'html.parser') title = soup.a.get_text() if len(title) > 160: short_title = title[:157] + "..." else: short_title = title # end of page or invalid title if "Weitere Dokumente anzeigen" in title or len(title) < 5: log.debug("EOF or title invalid. Skipping.") continue try: info = soup.find_all('li') publish_date = datetime.strptime(info[1].getText().replace("Datum: ", "", 1), "%d.%m.%Y") number = info[0].getText() type = info[2].getText().replace("Art: ", "", 1) author = info[3].getText().replace("Urheber: ", "", 1) link = 'https://www.landtag-bw.de'+soup.a.get('href') # TODO move base url to config insert_date = datetime.now() except Exception as e: log.info(f"Could not parse file. Skipping. {e}") continue if publish_date < runtil_date: log.info("Reach end of last scrape, finished.") this_scrape = Scrape.select(status="started").order_by(desc(Scrape.start)).first() if this_scrape is not None: this_scrape.status = "finished" this_scrape.end = datetime.now() this_scrape.processed_files = files this_scrape.flush() return # check if file already exists file_ = File.select(title=title, number=number).first() if file_ is not None: # TODO update file in database log.info(f"Already having file \"{title}\" in database. Skipping.") continue file = File(title=title, title_short=short_title, number=number, publish_date=publish_date, type=type, author=author, insert_date=insert_date, link=link) files += 1 except Exception as e: log.error(f"Fatal error, file might not be saved: {e}") log.debug("EOF. Getting next page.") self.offset += 30 self.run(files=files)
def UNTAGGED(): tagged_files = Metadata.select(Metadata.file).where(Metadata.field != 'import-time') return set(File.select().where(File.id.not_in(tagged_files)))